def test_create_collection_https(self): test_conn = SolrConnection(server="localhost", use_https=True) self.assertTrue(test_conn.url_template.startswith("https:")) test_conn = SolrConnection(server="localhost", use_https=False) self.assertTrue(test_conn.url_template.startswith("http:")) test_conn = SolrConnection(server="localhost") self.assertTrue(test_conn.url_template.startswith("http:"))
class TestConnection(unittest.TestCase): def setUp(self): self.conn = SolrConnection(version=os.getenv('SOLR_VERSION', '6.1.0')) self.collparams = {} confname = os.getenv('SOLR_CONFNAME', '') if confname != '': self.collparams['collection_config_name'] = confname def test_list(self): self.conn['foo'].create(**self.collparams) colls = self.conn.list() self.assertTrue(len(colls) >= 1) self.conn['foo'].drop() def test_live_nodes(self): nodes = self.conn.live_nodes # to support easy use of solrcloud gettingstarted self.assertTrue(len(nodes) >= 1) def test_cluster_leader(self): leader = self.conn.cluster_leader self.assertTrue(leader is not None) def test_create_collection(self): coll = self.conn.create_collection('test2', **self.collparams) self.assertTrue(isinstance(coll, SolrCollection)) self.conn.test2.drop()
class TestCollectionSearch(unittest.TestCase): def setUp(self): self.conn = SolrConnection() def test_add(self): coll2 = self.conn.create_collection('coll2') docs = [{"id": str(_id), "includes": "silly text"} for _id in range(5)] coll2.add(docs) coll2.commit() res = coll2.search({"q": "id:1"}).result self.assertTrue(len(res.response.docs) == 1) coll2.drop() def test_delete(self): coll2 = self.conn.create_collection('coll2') docs = [{"id": str(_id), "includes": "silly text"} for _id in range(5)] coll2.add(docs) coll2.commit() # delete w/ object so = SearchOptions() so.commonparams.q("id:1") coll2.delete(so) res = coll2.search({"q": "id:1"}).result self.assertTrue(len(res.response.docs) == 0) # delete w/ dict so = {"q": "id:2"} coll2.delete(so) res = coll2.search({"q": "id:2"}).result self.assertTrue(len(res.response.docs) == 0) coll2.drop()
class TestConnection(unittest.TestCase): def setUp(self): self.conn = SolrConnection(version=os.getenv("SOLR_VERSION", "6.1.0")) self.collparams = {} confname = os.getenv("SOLR_CONFNAME", "") if confname != "": self.collparams["collection_config_name"] = confname def test_list(self): self.conn["foo"].create(**self.collparams) colls = self.conn.list() self.assertTrue(len(colls) >= 1) self.conn["foo"].drop() def test_live_nodes(self): nodes = self.conn.live_nodes # to support easy use of solrcloud gettingstarted self.assertTrue(len(nodes) >= 1) def test_cluster_leader(self): leader = self.conn.cluster_leader self.assertTrue(leader is not None) def test_create_collection(self): coll = self.conn.create_collection("test2", **self.collparams) self.assertTrue(isinstance(coll, SolrCollection)) self.conn.test2.drop() def test_create_collection_https(self): test_conn = SolrConnection(server="localhost", use_https=True) self.assertTrue(test_conn.url_template.startswith("https:")) test_conn = SolrConnection(server="localhost", use_https=False) self.assertTrue(test_conn.url_template.startswith("http:")) test_conn = SolrConnection(server="localhost") self.assertTrue(test_conn.url_template.startswith("http:"))
def __init__(self, table, core="collection1"): self.table = table self.core = core self.url = 'localhost:8983' try: self.interface = SolrConnection(self.url)[self.core] except Exception as e: logger.warning("Cannot connect to Solr: %s" % e) raise RuntimeError("Cannot connect to Solr: %s" % e)
class TestCollectionSearch(unittest.TestCase): def setUp(self): self.conn = SolrConnection(version=os.getenv('SOLR_VERSION', '6.1.0')) self.collparams = {} confname = os.getenv('SOLR_CONFNAME', '') if confname != '': self.collparams['collection_config_name'] = confname def test_add(self): coll2 = self.conn.create_collection('coll2', **self.collparams) docs = [{"id": str(_id), "includes": "silly text"} for _id in range(5)] coll2.add(docs) coll2.commit() res = coll2.search({"q": "id:1"}).result self.assertTrue(len(res.response.docs) == 1) coll2.drop() def test_delete(self): coll2 = self.conn.create_collection('coll2', **self.collparams) docs = [{"id": str(_id), "includes": "silly text"} for _id in range(5)] coll2.add(docs) coll2.commit() # delete w/ object so = SearchOptions() so.commonparams.q("id:1") coll2.delete(so) res = coll2.search({"q": "id:1"}).result self.assertTrue(len(res.response.docs) == 0) # delete w/ dict so = {"q": "id:2"} coll2.delete(so) res = coll2.search({"q": "id:2"}).result self.assertTrue(len(res.response.docs) == 0) coll2.drop() def test_custom_params_search(self): coll2 = self.conn.create_collection('coll2', **self.collparams) docs = [{"id": str(_id), "includes": "silly text"} for _id in range(5)] res_1 = coll2.add(docs, {'omitHeader': "false"}) self.assertEquals(0, res_1.responseHeader.status) coll2.commit() res_2 = coll2.search({"q": "id:1", "omitHeader": "false"}).result self.assertEquals(0, res_2.responseHeader.status)
class DuplicatesPipeline(object): def __init__(self): servers = SOLR_SERVERS self.cnn = SolrConnection(servers)[SOLR_COLLECTION_DEFAULT] self.cache_list = [] def process_item(self, item, spider): if self.cnn.search({"q":'report_link:%s' % item['report_link'].encode('utf-8')}).result.response.numFound != 0 \ or self.cache_duplicate(item["report_link"]): raise DropItem("Duplicate item found: %s" % item['report_link']) else: if "report_revision_time_standard" in item: delta = datetime.timedelta(hours=8) dateTimezone = item["report_revision_time_standard"] - delta item["report_revision_time_standard"] = dateTimezone.strftime( '%Y-%m-%dT%H:%M:%S') + "Z" return item def cache_duplicate(self, report_link): if report_link in self.cache_list: return True else: if len(self.cache_list) > SOLR_CACHE_MAX_ELEMENTS_PER_SPIDER: self.cache_list = [] else: self.cache_list.append(report_link) return False
def init(args): global solr_connection solr_connection = SolrConnection(args.solr) global solr_collection solr_collection = solr_connection[args.collection] global SOLR_UNIQUE_KEY SOLR_UNIQUE_KEY = args.solrIdField dc_policy = RoundRobinPolicy() token_policy = TokenAwarePolicy(dc_policy) if args.cassandraUsername and args.cassandraPassword: auth_provider = PlainTextAuthProvider(username=args.cassandraUsername, password=args.cassandraPassword) else: auth_provider = None global cassandra_cluster cassandra_cluster = Cluster(contact_points=args.cassandra, port=args.cassandraPort, protocol_version=int( args.cassandraProtocolVersion), load_balancing_policy=token_policy, auth_provider=auth_provider) global cassandra_session cassandra_session = cassandra_cluster.connect( keyspace=args.cassandraKeyspace) global cassandra_table cassandra_table = args.cassandraTable
class DuplicatesPipeline(object): def __init__(self): servers = SOLR_SERVERS self.cnn = SolrConnection(servers)[SOLR_COLLECTION_DEFAULT] self.cache_list = [] def process_item(self, item, spider): if self.cnn.search( {"q": "report_link:%s" % item["report_link"].encode("utf-8")} ).result.response.numFound != 0 or self.cache_duplicate(item["report_link"]): raise DropItem("Duplicate item found: %s" % item["report_link"]) else: if "report_revision_time_standard" in item: delta = datetime.timedelta(hours=8) dateTimezone = item["report_revision_time_standard"] - delta item["report_revision_time_standard"] = dateTimezone.strftime("%Y-%m-%dT%H:%M:%S") + "Z" return item def cache_duplicate(self, report_link): if report_link in self.cache_list: return True else: if len(self.cache_list) > SOLR_CACHE_MAX_ELEMENTS_PER_SPIDER: self.cache_list = [] else: self.cache_list.append(report_link) return False
def get_connection(): ''' Get the solr connection. :return: ''' connection = SolrConnection(["localhost:8983"]) return connection
def __init__(self, table, server=None, **kwargs): """ :param server: list or str :param table: collection :param kwargs: detect_live_nodes=False, user=None, password=None, timeout=10, webappdir='solr' :return: """ if 'host' in kwargs: del kwargs['host'] if 'port' in kwargs: del kwargs['port'] self.conn = SolrConnection(server=server, **kwargs) self.table = table self.collection = self.conn[table]
class TestConnection(unittest.TestCase): def setUp(self): self.conn = SolrConnection() def test_list(self): colls = self.conn.list() self.assertTrue(len(colls) >= 1) def test_live_nodes(self): nodes = self.conn.live_nodes self.assertTrue(len(nodes) == 1) def test_cluster_leader(self): leader = self.conn.cluster_leader self.assertTrue(leader is not None) def test_create_collection(self): coll = self.conn.create_collection('test2') self.assertTrue(isinstance(coll, SolrCollection)) self.conn.test2.drop()
class TestConnection(unittest.TestCase): def setUp(self): self.conn = SolrConnection() def test_list(self): colls = self.conn.list() self.assertTrue(len(colls)>=1) def test_live_nodes(self): nodes = self.conn.live_nodes self.assertTrue(len(nodes)==1) def test_cluster_leader(self): leader = self.conn.cluster_leader self.assertTrue(leader is not None) def test_create_collection(self): coll = self.conn.create_collection('test2') self.assertTrue(isinstance(coll,SolrCollection)) self.conn.test2.drop()
class TestCollectionSearch(unittest.TestCase): def setUp(self): self.conn = SolrConnection(version=os.getenv('SOLR_VERSION', '6.1.0')) self.collparams = {} confname = os.getenv('SOLR_CONFNAME', '') if confname != '': self.collparams['collection_config_name'] = confname def test_add(self): coll2 = self.conn.create_collection('coll2', **self.collparams) docs = [{"id": str(_id), "includes": "silly text"} for _id in range(5)] coll2.add(docs) coll2.commit() res = coll2.search({"q": "id:1"}).result self.assertTrue(len(res.response.docs) == 1) coll2.drop() def test_delete(self): coll2 = self.conn.create_collection('coll2', **self.collparams) docs = [{"id": str(_id), "includes": "silly text"} for _id in range(5)] coll2.add(docs) coll2.commit() # delete w/ object so = SearchOptions() so.commonparams.q("id:1") coll2.delete(so) res = coll2.search({"q": "id:1"}).result self.assertTrue(len(res.response.docs) == 0) # delete w/ dict so = {"q": "id:2"} coll2.delete(so) res = coll2.search({"q": "id:2"}).result self.assertTrue(len(res.response.docs) == 0) coll2.drop()
class TestCollectionSearch(unittest.TestCase): def setUp(self): self.conn = SolrConnection() def test_add(self): coll2 = self.conn.create_collection('coll2') docs = [{"id": str(_id), "includes": "silly text"} for _id in range(5)] coll2.add(docs) coll2.commit() res = coll2.search({"q": "id:1"}).result self.assertTrue(len(res.response.docs) == 1) coll2.drop()
class TestCollectionSearch(unittest.TestCase): def setUp(self): self.conn = SolrConnection() def test_add(self): coll2 = self.conn.create_collection('coll2') docs = [{"id":str(_id),"includes":"silly text"} for _id in range(5)] coll2.add(docs) coll2.commit() res = coll2.search({"q":"id:1"}).result self.assertTrue(len(res.response.docs)== 1) coll2.drop()
def disease_query(coll_name, disease_phewas_df): # check solr collection conn = SolrConnection(timeout=6000) pa_coll = conn[coll_name] print ('Num disease: ', len(disease_phewas_df)) # query solr with each disease name # serial disease_docidx_arr = [] for disease_record in disease_phewas_df.iterrows(): result_arr = solr_query(pa_coll, disease_record) for res in result_arr: disease_docidx_arr.append(res) return disease_docidx_arr
class TestCollectionAdmin(unittest.TestCase): def setUp(self): self.conn = Connection() def test_create_collection(self): coll2 = self.conn.create_collection('coll2') time.sleep(3) coll2.drop() time.sleep(3) def test_reload(self): coll2 = self.conn.create_collection('coll2') time.sleep(3) res = coll2.reload() self.assertTrue(getattr(res, 'success') is not None) coll2.drop() def test_split_shard(self): coll2 = self.conn.create_collection('coll2') time.sleep(3) res = coll2.split_shard('shard1', ranges="80000000-90000000,90000001-7fffffff") time.sleep(3) self.assertTrue(getattr(res, 'success') is not None) coll2.drop() def test_create_shard(self): coll2 = self.conn.create_collection('coll2', router_name='implicit', shards='myshard1', max_shards_per_node=3) time.sleep(3) res = coll2.create_shard('shard_my') time.sleep(3) self.assertTrue(getattr(res, 'success') is not None) coll2.drop() def test_create_delete_alias(self): coll2 = self.conn.create_collection('coll2') coll2.create_alias('alias2') time.sleep(3) self.assertTrue(self.conn.alias2.is_alias()) coll2.delete_alias('alias2') coll2.drop() def test_delete_replica(self): coll2 = self.conn.create_collection('coll2', router_name='implicit', shards='myshard1', max_shards_per_node=6, replication_factor=2) time.sleep(3) coll2.delete_replica('core_node2', 'myshard1') coll2.drop()
class TestCollectionAdmin(unittest.TestCase): def setUp(self): self.conn = Connection() def test_create_collection(self): coll2 = self.conn.create_collection('coll2') time.sleep(3) coll2.drop() time.sleep(3) def test_reload(self): coll2 = self.conn.create_collection('coll2') time.sleep(3) res = coll2.reload() self.assertTrue(getattr(res,'success') is not None) coll2.drop() def test_split_shard(self): coll2 = self.conn.create_collection('coll2') time.sleep(3) res = coll2.split_shard('shard1',ranges="80000000-90000000,90000001-7fffffff") time.sleep(3) self.assertTrue(getattr(res,'success') is not None) coll2.drop() def test_create_shard(self): coll2 = self.conn.create_collection('coll2', router_name='implicit', shards='myshard1',max_shards_per_node=3) time.sleep(3) res = coll2.create_shard('shard_my') time.sleep(3) self.assertTrue(getattr(res,'success') is not None) coll2.drop() def test_create_delete_alias(self): coll2 = self.conn.create_collection('coll2') coll2.create_alias('alias2') time.sleep(3) self.assertTrue(self.conn.alias2.is_alias()) coll2.delete_alias('alias2') coll2.drop() def test_delete_replica(self): coll2 = self.conn.create_collection('coll2', router_name='implicit', shards='myshard1', max_shards_per_node=6, replication_factor=2) time.sleep(3) coll2.delete_replica('core_node2','myshard1') coll2.drop()
def init(args): global solr_connection solr_connection = SolrConnection(args.solr) global solr_collection solr_collection = solr_connection[args.collection] dc_policy = RoundRobinPolicy() token_policy = TokenAwarePolicy(dc_policy) global cassandra_cluster cassandra_cluster = Cluster(contact_points=args.cassandra, port=args.cassandraPort, protocol_version=int( args.cassandraProtocolVersion), load_balancing_policy=token_policy) global cassandra_session cassandra_session = cassandra_cluster.connect( keyspace=args.cassandraKeyspace) global cassandra_table cassandra_table = args.cassandraTable
def open_spider(self, spider): solr_collection_name = self.solr_collection_map.get(spider.name) if solr_collection_name: self.solr_collection_name = solr_collection_name if not self.solr_collection_name: spider.log("No collection associated with " + spider.name + "!", level=log.CRITICAL) raise CloseSpider if self.solr_cloud_mode: from solrcloudpy import SolrConnection self.solr_connection = SolrConnection( server=self.solr_servers, detect_live_nodes=self.solr_detect_live_nodes, user=self.solr_user, password=self.solr_password, timeout=self.solr_timeout, webappdir=self.solr_web_app) self.solr_collection = self.solr_connection[ self.solr_collection_name] else: from solr import Solr from urlparse import urljoin collection_url = reduce(urljoin, (self.solr_servers[0], self.solr_web_app, self.solr_collection_name)) if isinstance(collection_url, unicode): collection_url = collection_url.encode("UTF-8") self.solr_collection = Solr(url=collection_url, http_user=self.solr_user, http_pass=self.solr_password, timeout=self.solr_timeout) if self.solr_cache_max_len > 0: max_len = self.solr_cache_max_len * 2 else: max_len = 2 self.cache_buffer[spider.name] = SpiderCache(maxlen=max_len) self.locks[spider.name] = Lock()
class TestCollectionSearch(unittest.TestCase): def setUp(self): self.conn = SolrConnection(version=os.getenv("SOLR_VERSION", "6.1.0")) self.collparams = {} confname = os.getenv("SOLR_CONFNAME", "") if confname != "": self.collparams["collection_config_name"] = confname def test_add(self): coll2 = self.conn.create_collection("coll2", **self.collparams) docs = [{"id": str(_id), "includes": "silly text"} for _id in range(5)] coll2.add(docs) coll2.commit() res = coll2.search({"q": "id:1"}).result self.assertTrue(len(res.response.docs) == 1) coll2.drop() def test_delete(self): coll2 = self.conn.create_collection("coll2", **self.collparams) docs = [{"id": str(_id), "includes": "silly text"} for _id in range(5)] coll2.add(docs) coll2.commit() # delete w/ object so = SearchOptions() so.commonparams.q("id:1") coll2.delete(so) res = coll2.search({"q": "id:1"}).result self.assertTrue(len(res.response.docs) == 0) # delete w/ dict so = {"q": "id:2"} coll2.delete(so) res = coll2.search({"q": "id:2"}).result self.assertTrue(len(res.response.docs) == 0) coll2.drop() def test_custom_params_search(self): coll2 = self.conn.create_collection("coll2", **self.collparams) docs = [{"id": str(_id), "includes": "silly text"} for _id in range(5)] res_1 = coll2.add(docs, {"omitHeader": "false"}) self.assertEqual(0, res_1.responseHeader.status) coll2.commit() res_2 = coll2.search({"q": "id:1", "omitHeader": "false"}).result self.assertEqual(0, res_2.responseHeader.status) def test_post_body_search(self): coll2 = self.conn.create_collection("coll2", **self.collparams) docs = [{"id": str(_id), "includes": "silly text"} for _id in range(5)] coll2.add(docs) coll2.commit() # JSON DSL Query format res = coll2.search({}, "POST", '{"query": "id:1"}').result self.assertTrue(len(res.response.docs) == 1) coll2.drop()
def setUp(self): self.conn = SolrConnection(version=os.getenv('SOLR_VERSION', '6.1.0')) self.collparams = {} confname = os.getenv('SOLR_CONFNAME', '') if confname != '': self.collparams['collection_config_name'] = confname
class TestCollectionAdmin(unittest.TestCase): def setUp(self): self.conn = SolrConnection(version=os.getenv('SOLR_VERSION', '6.1.0')) self.collparams = {} confname = os.getenv('SOLR_CONFNAME', '') if confname != '': self.collparams['collection_config_name'] = confname def test_create_collection(self): original_count = len(self.conn.list()) coll2 = self.conn.create_collection('coll2', **self.collparams) self.assertEqual(len(self.conn.list()), original_count+1) self.conn.list() time.sleep(3) coll3 = self.conn.create_collection('coll3', **self.collparams) self.assertEqual(len(self.conn.list()), original_count+2) # todo calling state here means the integration works, but what should we assert? coll2.state coll2.drop() self.assertEqual(len(self.conn.list()), original_count+1) time.sleep(3) coll3.drop() self.assertEqual(len(self.conn.list()), original_count) def test_reload(self): coll2 = self.conn.create_collection('coll2', **self.collparams) time.sleep(3) res = coll2.reload() self.assertTrue(getattr(res, 'success') is not None) coll2.drop() def test_split_shard(self): coll2 = self.conn.create_collection('coll2', **self.collparams) time.sleep(3) res = coll2.split_shard('shard1', ranges="80000000-90000000,90000001-7fffffff") time.sleep(3) self.assertTrue(getattr(res, 'success') is not None) coll2.drop() def test_create_shard(self): coll2 = self.conn.create_collection('coll2', router_name='implicit', shards='myshard1', max_shards_per_node=3, **self.collparams) time.sleep(3) res = coll2.create_shard('shard_my') time.sleep(3) self.assertTrue(getattr(res, 'success') is not None) coll2.drop() def test_create_delete_alias(self): coll2 = self.conn.create_collection('coll2', **self.collparams) coll2.create_alias('alias2') time.sleep(3) self.assertTrue(self.conn.alias2.is_alias()) coll2.delete_alias('alias2') coll2.drop() def test_delete_replica(self): try: coll2 = self.conn.create_collection('test_delete_replica', router_name='implicit', shards='myshard1', max_shards_per_node=6, replication_factor=2, **self.collparams) except ReadTimeout: print("Encountered read timeout while testing delete replicate") print("This generally doesn't mean the collection wasn't created with the settings passed.") coll2 = self.conn['test_delete_replica'] time.sleep(3) firstReplica = list(coll2.shards['shards']['myshard1']['replicas'].dict.keys())[0] result = coll2.delete_replica(firstReplica, 'myshard1') self.assertTrue(result.success) coll2.drop()
def setUp(self): self.conn = SolrConnection(version=os.getenv("SOLR_VERSION", "6.1.0")) self.collparams = {} confname = os.getenv("SOLR_CONFNAME", "") if confname != "": self.collparams["collection_config_name"] = confname
class SolrBackend(object): def __init__(self, table, core="collection1"): self.table = table self.core = core self.url = 'localhost:8983' try: self.interface = SolrConnection(self.url)[self.core] except Exception as e: logger.warning("Cannot connect to Solr: %s" % e) raise RuntimeError("Cannot connect to Solr: %s" % e) def get_ids(self, queryset): return [r.id for r in queryset.select(self.table._id)] def indexes(self, *fieldnames): self.fieldnames = fieldnames def after_insert(self, fields, id): document = [{'id': id}] for name in self.fieldnames: if name in fields: document[0][name] = str(fields[name]) self.interface.add(document) self.interface.commit() return True def after_update(self, queryset, fields): """ caveat, this should work but only if ALL indexed fields are updated at once """ ids = self.get_ids(queryset) documents = [] for id in ids: self.interface.delete({'q':'id:%i'%id}) document = {'id':id} for name in self.fieldnames: if name in fields: document[name] = str(fields[name]) documents.append(document) self.interface.add(documents) self.interface.commit() return True def index_table(self, query, db): for row in db(query).select(): self.interface.delete({'q':'id:%i'%row.id}) self.interface.commit() documents = [] for row in db(query).select(): document = {'id':row.id} for name in self.fieldnames: document[name] = str(row[name]) documents.append(document) self.interface.add(documents) self.interface.commit() return True def update(self, query, fields, db, **core_fields): rows = db(query).select(*fields) documents = [] for row in rows: document={} for key in row.keys(): for core_field in core_fields: if core_field in key: document[key] = str(row[key]) if key == 'id': self.interface.delete({'q':'id:%i'%row[key]}) documents.append(document) self.interface.add(documents) self.interface.commit() return True def before_delete(self, queryset): self.ids = self.get_ids(queryset) return False def after_delete(self, queryset): self.ids = self.get_ids(queryset) for id in self.ids: self.interface.delete({'q':'id:%i'%id}) self.interface.commit() return True def meta_search(self, limit, offset, mode, compact, sort, **fieldkeys): query = '' items = len(fieldkeys) count = 0 # Convert to solrcloudpy search for fieldkey in fieldkeys: query += " %s:%s " % (fieldkey, fieldkeys[fieldkey]) count += 1 if items > 1 and count < items: query += mode se = SearchOptions() se.commonparams.q(query).rows(limit).sort(sort).start(offset) response = self.interface.search(se) if compact: return [r['id'] for r in response.result['response'].docs] return response.result['response']
from solrcloudpy import SolrConnection import os connection = SolrConnection(["localhost:8983", "localhost:7574"], version=os.getenv("SOLR_VERSION", "5.3.2")) for collection_name in connection.list(): print "Dropping %s" % collection_name connection[collection_name].drop()
class TestCollectionAdmin(unittest.TestCase): def setUp(self): self.conn = SolrConnection(version=os.getenv("SOLR_VERSION", "6.1.0")) self.collparams = {} confname = os.getenv("SOLR_CONFNAME", "") if confname != "": self.collparams["collection_config_name"] = confname def test_create_collection(self): original_count = len(self.conn.list()) coll2 = self.conn.create_collection("coll2", **self.collparams) self.assertEqual(len(self.conn.list()), original_count + 1) self.conn.list() time.sleep(3) coll3 = self.conn.create_collection("coll3", **self.collparams) self.assertEqual(len(self.conn.list()), original_count + 2) # todo calling state here means the integration works, but what should we assert? coll2.state coll2.drop() self.assertEqual(len(self.conn.list()), original_count + 1) time.sleep(3) coll3.drop() self.assertEqual(len(self.conn.list()), original_count) def test_reload(self): coll2 = self.conn.create_collection("coll2", **self.collparams) time.sleep(3) res = coll2.reload() self.assertTrue(getattr(res, "success") is not None) coll2.drop() def test_split_shard(self): coll2 = self.conn.create_collection("coll2", **self.collparams) time.sleep(3) res = coll2.split_shard("shard1", ranges="80000000-90000000,90000001-7fffffff") time.sleep(3) self.assertTrue(getattr(res, "success") is not None) coll2.drop() def test_create_shard(self): coll2 = self.conn.create_collection( "coll2", router_name="implicit", shards="myshard1", max_shards_per_node=3, **self.collparams ) time.sleep(3) res = coll2.create_shard("shard_my") time.sleep(3) self.assertTrue(getattr(res, "success") is not None) coll2.drop() def test_create_delete_alias(self): coll2 = self.conn.create_collection("coll2", **self.collparams) coll2.create_alias("alias2") time.sleep(3) self.assertTrue(self.conn.alias2.is_alias()) coll2.delete_alias("alias2") coll2.drop() def test_delete_replica(self): try: coll2 = self.conn.create_collection( "test_delete_replica", router_name="implicit", shards="myshard1", max_shards_per_node=6, replication_factor=2, **self.collparams ) except ReadTimeout: print "Encountered read timeout while testing delete replicate" print "This generally doesn't mean the collection wasn't created with the settings passed." coll2 = self.conn["test_delete_replica"] time.sleep(3) coll2.delete_replica("core_node2", "myshard1") coll2.drop()
class SolrBackend(object): def __init__(self, table, core="collection1"): self.table = table self.core = core self.url = 'localhost:8983' try: self.interface = SolrConnection(self.url)[self.core] except Exception as e: logger.warning("Cannot connect to Solr: %s" % e) raise RuntimeError("Cannot connect to Solr: %s" % e) def get_ids(self, queryset): return [r.id for r in queryset.select(self.table._id)] def indexes(self, *fieldnames): self.fieldnames = fieldnames def after_insert(self, fields, id): document = [{'id': id}] for name in self.fieldnames: if name in fields: document[0][name] = unicode(fields[name]) self.interface.add(document) self.interface.commit() return True def after_update(self, queryset, fields): """ caveat, this should work but only if ALL indexed fields are updated at once """ ids = self.get_ids(queryset) documents = [] for id in ids: self.interface.delete(id) document = {'id':id} for name in self.fieldnames: if name in fields: document[name] = unicode(fields[name]) documents.append(document) self.interface.add(documents) self.interface.commit() return True def update(self, query, fields, db, **core_fields): ''' Usage: ''' rows = db(query).select(*fields) documents = [] for row in rows: document={} for key in row.keys(): for core_field in core_fields: if core_field in row[key]: document[core_fields[core_field]] = unicode(row[key][core_field]) if core_field == 'id': self.interface.delete(row[key][core_field]) documents.append(document) self.interface.add(documents) self.interface.commit() return True def before_delete(self, queryset): self.ids = self.get_ids(queryset) return False def after_delete(self): for id in self.ids: self.interface.delete(id=id) self.interface.commit() return True def meta_search(self, limit, offset, mode, compact, sort, **fieldkeys): query = '' items = len(fieldkeys) count = 0 # Convert to solrcloudpy search for fieldkey in fieldkeys: query += " %s:%s " % (fieldkey, fieldkeys[fieldkey]) count += 1 if items > 1 and count < items: query += mode se = SearchOptions() se.commonparams.q(query).rows(limit).sort(sort).start(offset) print se response = self.interface.search(se) if compact: return [r['id'] for r in response.result['response'].docs] return response.result['response']
def setUp(self): self.conn = Connection()
def __init__(self): servers = SOLR_SERVERS self.cnn = SolrConnection(servers)[SOLR_COLLECTION_DEFAULT] self.cache_list = []
def _init_connection(self): self.conn = SolrConnection(self.nodes, version=self.version, timeout=6000)
class TestCollectionSearch(unittest.TestCase): def setUp(self): self.conn = SolrConnection(version=os.getenv('SOLR_VERSION', '6.1.0')) self.collparams = {} confname = os.getenv('SOLR_CONFNAME', '') if confname != '': self.collparams['collection_config_name'] = confname def test_add(self): coll2 = self.conn.create_collection('coll2', **self.collparams) docs = [{"id": str(_id), "includes": "silly text"} for _id in range(5)] coll2.add(docs) coll2.commit() res = coll2.search({"q": "id:1"}).result self.assertTrue(len(res.response.docs) == 1) coll2.drop() def test_delete(self): coll2 = self.conn.create_collection('coll2', **self.collparams) docs = [{"id": str(_id), "includes": "silly text"} for _id in range(5)] coll2.add(docs) coll2.commit() # delete w/ object so = SearchOptions() so.commonparams.q("id:1") coll2.delete(so) res = coll2.search({"q": "id:1"}).result self.assertTrue(len(res.response.docs) == 0) # delete w/ dict so = {"q": "id:2"} coll2.delete(so) res = coll2.search({"q": "id:2"}).result self.assertTrue(len(res.response.docs) == 0) coll2.drop() def test_custom_params_search(self): coll2 = self.conn.create_collection('coll2', **self.collparams) docs = [{"id": str(_id), "includes": "silly text"} for _id in range(5)] res_1 = coll2.add(docs, {'omitHeader': "false"}) self.assertEqual(0, res_1.responseHeader.status) coll2.commit() res_2 = coll2.search({"q": "id:1", "omitHeader": "false"}).result self.assertEqual(0, res_2.responseHeader.status) def test_post_body_search(self): coll2 = self.conn.create_collection('coll2', **self.collparams) docs = [{"id": str(_id), "includes": "silly text"} for _id in range(5)] coll2.add(docs) coll2.commit() # JSON DSL Query format res = coll2.search({},"POST", '{"query": "id:1"}').result self.assertTrue(len(res.response.docs) == 1) coll2.drop()
def setUp(self): self.conn = SolrConnection()
from solrcloudpy import SolrConnection import os connection = SolrConnection(['localhost:8983', 'localhost:7574'], version=os.getenv('SOLR_VERSION', '5.3.2')) for collection_name in connection.list(): print "Dropping %s" % collection_name connection[collection_name].drop()
class TestCollectionAdmin(unittest.TestCase): def setUp(self): self.conn = SolrConnection(version=os.getenv('SOLR_VERSION', '6.1.0')) self.collparams = {} confname = os.getenv('SOLR_CONFNAME', '') if confname != '': self.collparams['collection_config_name'] = confname def test_create_collection(self): original_count = len(self.conn.list()) coll2 = self.conn.create_collection('coll2', **self.collparams) self.assertEqual(len(self.conn.list()), original_count + 1) self.conn.list() time.sleep(3) coll3 = self.conn.create_collection('coll3', **self.collparams) self.assertEqual(len(self.conn.list()), original_count + 2) # todo calling state here means the integration works, but what should we assert? coll2.state coll2.drop() self.assertEqual(len(self.conn.list()), original_count + 1) time.sleep(3) coll3.drop() self.assertEqual(len(self.conn.list()), original_count) def test_reload(self): coll2 = self.conn.create_collection('coll2', **self.collparams) time.sleep(3) res = coll2.reload() self.assertTrue(getattr(res, 'success') is not None) coll2.drop() def test_split_shard(self): coll2 = self.conn.create_collection('coll2', **self.collparams) time.sleep(3) res = coll2.split_shard('shard1', ranges="80000000-90000000,90000001-7fffffff") time.sleep(3) self.assertTrue(getattr(res, 'success') is not None) coll2.drop() def test_create_shard(self): coll2 = self.conn.create_collection('coll2', router_name='implicit', shards='myshard1', max_shards_per_node=3, **self.collparams) time.sleep(3) res = coll2.create_shard('shard_my') time.sleep(3) self.assertTrue(getattr(res, 'success') is not None) coll2.drop() def test_create_delete_alias(self): coll2 = self.conn.create_collection('coll2', **self.collparams) coll2.create_alias('alias2') time.sleep(3) self.assertTrue(self.conn.alias2.is_alias()) coll2.delete_alias('alias2') coll2.drop() def test_delete_replica(self): try: coll2 = self.conn.create_collection('test_delete_replica', router_name='implicit', shards='myshard1', max_shards_per_node=6, replication_factor=2, **self.collparams) except ReadTimeout: print "Encountered read timeout while testing delete replicate" print "This generally doesn't mean the collection wasn't created with the settings passed." coll2 = self.conn['test_delete_replica'] time.sleep(3) coll2.delete_replica('core_node2', 'myshard1') coll2.drop()
class QueueSolr(object): """ SolrDB 连接模块 """ def __init__(self, table, server=None, **kwargs): """ :param server: list or str :param table: collection :param kwargs: detect_live_nodes=False, user=None, password=None, timeout=10, webappdir='solr' :return: """ if 'host' in kwargs: del kwargs['host'] if 'port' in kwargs: del kwargs['port'] self.conn = SolrConnection(server=server, **kwargs) self.table = table self.collection = self.conn[table] @QueueBase.catch def collections(self): """ 获取所以集合列表 :return: """ return self.conn.list() @QueueBase.catch def find(self, *args, **kwargs): """ 查找某个集合下的field 查找的字段必须键索引 否则会报400 error :param args: :param kwargs: :return: """ for dict_ in args: if isinstance(dict_, dict): kwargs.update(dict_) valuess = ' AND '.join([ '%s:%s' % (k, v) for k, v in kwargs.items() ]) if len(kwargs) > 1 else ':'.join(kwargs.keys() + kwargs.values()) q_item = {'q': valuess} #q_item=SearchOptions().commonparams.q(valuess) return self.collection.search(q_item).result['response'] @QueueBase.catch def update(self, *args, **kwargs): """ 更新数据 更新的数据字段 原表必须存在,*_temp都可以 :param args: :param kwargs: :return: """ for dict_ in args: if isinstance(dict_, dict): kwargs.update(dict_) self.collection.add([kwargs]) log.info(u"%s Storage success!" % json.dumps(kwargs)) @QueueBase.catch def delete(self, *args, **kwargs): """ 删除数据 删除的字段原表也必须存在 否则400 Client Error: Bad Request :param args: :param kwargs: :return: """ for dict_ in args: if isinstance(dict_, dict): kwargs.update(dict_) valuess = ' AND '.join([ '%s:%s' % (k, v) for k, v in kwargs.items() ]) if len(kwargs) > 1 else ':'.join(kwargs.keys() + kwargs.values()) q_item = {'q': valuess} self.collection.delete(q_item, commit=False) log.info(u"%s deleted!" % json.dumps(kwargs))
class TestCollectionAdmin(unittest.TestCase): def setUp(self): self.conn = SolrConnection(version=os.getenv("SOLR_VERSION", "6.1.0")) self.collparams = {} confname = os.getenv("SOLR_CONFNAME", "") if confname != "": self.collparams["collection_config_name"] = confname def test_create_collection(self): original_count = len(self.conn.list()) coll2 = self.conn.create_collection("coll2", **self.collparams) time.sleep(3) self.assertEqual(len(self.conn.list()), original_count + 1) self.conn.list() time.sleep(3) coll3 = self.conn.create_collection("coll3", **self.collparams) time.sleep(3) self.assertEqual(len(self.conn.list()), original_count + 2) # todo calling state here means the integration works, but what should we assert? coll2.state coll2.drop() time.sleep(3) self.assertEqual(len(self.conn.list()), original_count + 1) time.sleep(3) coll3.drop() time.sleep(3) self.assertEqual(len(self.conn.list()), original_count) def test_reload(self): coll2 = self.conn.create_collection("coll2", **self.collparams) time.sleep(3) res = coll2.reload() self.assertTrue(getattr(res, "success") is not None) coll2.drop() def test_split_shard(self): coll2 = self.conn.create_collection("coll2", **self.collparams) time.sleep(3) res = coll2.split_shard("shard1", ranges="80000000-90000000,90000001-7fffffff") time.sleep(3) self.assertTrue(getattr(res, "success") is not None) coll2.drop() def test_create_shard(self): coll2 = self.conn.create_collection("coll2", router_name="implicit", shards="myshard1", max_shards_per_node=3, **self.collparams) time.sleep(3) res = coll2.create_shard("shard_my") time.sleep(3) self.assertTrue(getattr(res, "success") is not None) coll2.drop() def test_create_delete_alias(self): coll2 = self.conn.create_collection("coll2", **self.collparams) coll2.create_alias("alias2") time.sleep(3) self.assertTrue(self.conn.alias2.is_alias()) coll2.delete_alias("alias2") coll2.drop() def test_delete_replica(self): try: coll2 = self.conn.create_collection("test_delete_replica", router_name="implicit", shards="myshard1", max_shards_per_node=6, replication_factor=2, **self.collparams) except ReadTimeout: print("Encountered read timeout while testing delete replicate") print( "This generally doesn't mean the collection wasn't created with the settings passed." ) coll2 = self.conn["test_delete_replica"] time.sleep(3) firstReplica = list( coll2.shards["shards"]["myshard1"]["replicas"].dict.keys())[0] result = coll2.delete_replica(firstReplica, "myshard1") self.assertTrue(result.success) coll2.drop()
import os from solrcloudpy import SolrConnection connection = SolrConnection(["localhost:8983", "localhost:7574"], version=os.getenv("SOLR_VERSION", "7.7.0")) for collection_name in connection.list(): print("Dropping %s" % collection_name) connection[collection_name].drop()