Пример #1
0
 def test_create_collection_https(self):
     test_conn = SolrConnection(server="localhost", use_https=True)
     self.assertTrue(test_conn.url_template.startswith("https:"))
     test_conn = SolrConnection(server="localhost", use_https=False)
     self.assertTrue(test_conn.url_template.startswith("http:"))
     test_conn = SolrConnection(server="localhost")
     self.assertTrue(test_conn.url_template.startswith("http:"))
Пример #2
0
class TestConnection(unittest.TestCase):
    def setUp(self):
        self.conn = SolrConnection(version=os.getenv('SOLR_VERSION', '6.1.0'))
        self.collparams = {}
        confname = os.getenv('SOLR_CONFNAME', '')
        if confname != '':
            self.collparams['collection_config_name'] = confname

    def test_list(self):
        self.conn['foo'].create(**self.collparams)
        colls = self.conn.list()
        self.assertTrue(len(colls) >= 1)
        self.conn['foo'].drop()

    def test_live_nodes(self):
        nodes = self.conn.live_nodes
        # to support easy use of solrcloud gettingstarted
        self.assertTrue(len(nodes) >= 1)

    def test_cluster_leader(self):
        leader = self.conn.cluster_leader
        self.assertTrue(leader is not None)

    def test_create_collection(self):
        coll = self.conn.create_collection('test2', **self.collparams)
        self.assertTrue(isinstance(coll, SolrCollection))
        self.conn.test2.drop()
Пример #3
0
class TestCollectionSearch(unittest.TestCase):
    def setUp(self):
        self.conn = SolrConnection()

    def test_add(self):
        coll2 = self.conn.create_collection('coll2')
        docs = [{"id": str(_id), "includes": "silly text"} for _id in range(5)]

        coll2.add(docs)
        coll2.commit()
        res = coll2.search({"q": "id:1"}).result
        self.assertTrue(len(res.response.docs) == 1)
        coll2.drop()

    def test_delete(self):
        coll2 = self.conn.create_collection('coll2')
        docs = [{"id": str(_id), "includes": "silly text"} for _id in range(5)]

        coll2.add(docs)
        coll2.commit()

        # delete w/ object
        so = SearchOptions()
        so.commonparams.q("id:1")
        coll2.delete(so)
        res = coll2.search({"q": "id:1"}).result
        self.assertTrue(len(res.response.docs) == 0)

        # delete w/ dict
        so = {"q": "id:2"}
        coll2.delete(so)
        res = coll2.search({"q": "id:2"}).result
        self.assertTrue(len(res.response.docs) == 0)

        coll2.drop()
Пример #4
0
class TestConnection(unittest.TestCase):
    def setUp(self):
        self.conn = SolrConnection(version=os.getenv("SOLR_VERSION", "6.1.0"))
        self.collparams = {}
        confname = os.getenv("SOLR_CONFNAME", "")
        if confname != "":
            self.collparams["collection_config_name"] = confname

    def test_list(self):
        self.conn["foo"].create(**self.collparams)
        colls = self.conn.list()
        self.assertTrue(len(colls) >= 1)
        self.conn["foo"].drop()

    def test_live_nodes(self):
        nodes = self.conn.live_nodes
        # to support easy use of solrcloud gettingstarted
        self.assertTrue(len(nodes) >= 1)

    def test_cluster_leader(self):
        leader = self.conn.cluster_leader
        self.assertTrue(leader is not None)

    def test_create_collection(self):
        coll = self.conn.create_collection("test2", **self.collparams)
        self.assertTrue(isinstance(coll, SolrCollection))
        self.conn.test2.drop()

    def test_create_collection_https(self):
        test_conn = SolrConnection(server="localhost", use_https=True)
        self.assertTrue(test_conn.url_template.startswith("https:"))
        test_conn = SolrConnection(server="localhost", use_https=False)
        self.assertTrue(test_conn.url_template.startswith("http:"))
        test_conn = SolrConnection(server="localhost")
        self.assertTrue(test_conn.url_template.startswith("http:"))
Пример #5
0
 def __init__(self, table, core="collection1"):
     self.table = table
     self.core = core
     self.url = 'localhost:8983'
     try:
         self.interface = SolrConnection(self.url)[self.core]
     except Exception as e:
         logger.warning("Cannot connect to Solr: %s" % e)
         raise RuntimeError("Cannot connect to Solr: %s" % e)
Пример #6
0
class TestCollectionSearch(unittest.TestCase):
    def setUp(self):
        self.conn = SolrConnection(version=os.getenv('SOLR_VERSION', '6.1.0'))
        self.collparams = {}
        confname = os.getenv('SOLR_CONFNAME', '')
        if confname != '':
            self.collparams['collection_config_name'] = confname

    def test_add(self):
        coll2 = self.conn.create_collection('coll2', **self.collparams)
        docs = [{"id": str(_id), "includes": "silly text"} for _id in range(5)]

        coll2.add(docs)
        coll2.commit()
        res = coll2.search({"q": "id:1"}).result
        self.assertTrue(len(res.response.docs) == 1)
        coll2.drop()

    def test_delete(self):
        coll2 = self.conn.create_collection('coll2', **self.collparams)
        docs = [{"id": str(_id), "includes": "silly text"} for _id in range(5)]

        coll2.add(docs)
        coll2.commit()

        # delete w/ object
        so = SearchOptions()
        so.commonparams.q("id:1")
        coll2.delete(so)
        res = coll2.search({"q": "id:1"}).result
        self.assertTrue(len(res.response.docs) == 0)

        # delete w/ dict
        so = {"q": "id:2"}
        coll2.delete(so)
        res = coll2.search({"q": "id:2"}).result
        self.assertTrue(len(res.response.docs) == 0)

        coll2.drop()

    def test_custom_params_search(self):
        coll2 = self.conn.create_collection('coll2', **self.collparams)
        docs = [{"id": str(_id), "includes": "silly text"} for _id in range(5)]

        res_1 = coll2.add(docs, {'omitHeader': "false"})
        self.assertEquals(0, res_1.responseHeader.status)

        coll2.commit()
        res_2 = coll2.search({"q": "id:1", "omitHeader": "false"}).result
        self.assertEquals(0, res_2.responseHeader.status)
Пример #7
0
class DuplicatesPipeline(object):
    def __init__(self):
        servers = SOLR_SERVERS
        self.cnn = SolrConnection(servers)[SOLR_COLLECTION_DEFAULT]
        self.cache_list = []

    def process_item(self, item, spider):
        if self.cnn.search({"q":'report_link:%s' % item['report_link'].encode('utf-8')}).result.response.numFound != 0 \
                or self.cache_duplicate(item["report_link"]):
            raise DropItem("Duplicate item found: %s" % item['report_link'])
        else:
            if "report_revision_time_standard" in item:
                delta = datetime.timedelta(hours=8)
                dateTimezone = item["report_revision_time_standard"] - delta
                item["report_revision_time_standard"] = dateTimezone.strftime(
                    '%Y-%m-%dT%H:%M:%S') + "Z"
            return item

    def cache_duplicate(self, report_link):
        if report_link in self.cache_list:
            return True
        else:
            if len(self.cache_list) > SOLR_CACHE_MAX_ELEMENTS_PER_SPIDER:
                self.cache_list = []
            else:
                self.cache_list.append(report_link)
            return False
def init(args):
    global solr_connection
    solr_connection = SolrConnection(args.solr)
    global solr_collection
    solr_collection = solr_connection[args.collection]
    global SOLR_UNIQUE_KEY
    SOLR_UNIQUE_KEY = args.solrIdField

    dc_policy = RoundRobinPolicy()
    token_policy = TokenAwarePolicy(dc_policy)

    if args.cassandraUsername and args.cassandraPassword:
        auth_provider = PlainTextAuthProvider(username=args.cassandraUsername,
                                              password=args.cassandraPassword)
    else:
        auth_provider = None

    global cassandra_cluster
    cassandra_cluster = Cluster(contact_points=args.cassandra,
                                port=args.cassandraPort,
                                protocol_version=int(
                                    args.cassandraProtocolVersion),
                                load_balancing_policy=token_policy,
                                auth_provider=auth_provider)
    global cassandra_session
    cassandra_session = cassandra_cluster.connect(
        keyspace=args.cassandraKeyspace)

    global cassandra_table
    cassandra_table = args.cassandraTable
Пример #9
0
class DuplicatesPipeline(object):
    def __init__(self):
        servers = SOLR_SERVERS
        self.cnn = SolrConnection(servers)[SOLR_COLLECTION_DEFAULT]
        self.cache_list = []

    def process_item(self, item, spider):
        if self.cnn.search(
            {"q": "report_link:%s" % item["report_link"].encode("utf-8")}
        ).result.response.numFound != 0 or self.cache_duplicate(item["report_link"]):
            raise DropItem("Duplicate item found: %s" % item["report_link"])
        else:
            if "report_revision_time_standard" in item:
                delta = datetime.timedelta(hours=8)
                dateTimezone = item["report_revision_time_standard"] - delta
                item["report_revision_time_standard"] = dateTimezone.strftime("%Y-%m-%dT%H:%M:%S") + "Z"
            return item

    def cache_duplicate(self, report_link):
        if report_link in self.cache_list:
            return True
        else:
            if len(self.cache_list) > SOLR_CACHE_MAX_ELEMENTS_PER_SPIDER:
                self.cache_list = []
            else:
                self.cache_list.append(report_link)
            return False
Пример #10
0
def get_connection():
    '''
    Get the solr connection.
    :return:
    '''
    connection = SolrConnection(["localhost:8983"])

    return connection
Пример #11
0
 def __init__(self, table, server=None, **kwargs):
     """
     :param server: list or str
     :param table: collection
     :param kwargs:  detect_live_nodes=False,
                      user=None,
                      password=None,
                      timeout=10,
                      webappdir='solr'
     :return:
     """
     if 'host' in kwargs:
         del kwargs['host']
     if 'port' in kwargs:
         del kwargs['port']
     self.conn = SolrConnection(server=server, **kwargs)
     self.table = table
     self.collection = self.conn[table]
Пример #12
0
 def __init__(self, table, core="collection1"):
     self.table = table
     self.core = core
     self.url = 'localhost:8983'
     try:
         self.interface = SolrConnection(self.url)[self.core]
     except Exception as e:
         logger.warning("Cannot connect to Solr: %s" % e)
         raise RuntimeError("Cannot connect to Solr: %s" % e)
Пример #13
0
class TestConnection(unittest.TestCase):
    def setUp(self):
        self.conn = SolrConnection()

    def test_list(self):
        colls = self.conn.list()
        self.assertTrue(len(colls) >= 1)

    def test_live_nodes(self):
        nodes = self.conn.live_nodes
        self.assertTrue(len(nodes) == 1)

    def test_cluster_leader(self):
        leader = self.conn.cluster_leader
        self.assertTrue(leader is not None)

    def test_create_collection(self):
        coll = self.conn.create_collection('test2')
        self.assertTrue(isinstance(coll, SolrCollection))
        self.conn.test2.drop()
Пример #14
0
class TestConnection(unittest.TestCase):
    def setUp(self):
        self.conn = SolrConnection()

    def test_list(self):
        colls = self.conn.list()
        self.assertTrue(len(colls)>=1)

    def test_live_nodes(self):
        nodes = self.conn.live_nodes
        self.assertTrue(len(nodes)==1)

    def test_cluster_leader(self):
        leader = self.conn.cluster_leader
        self.assertTrue(leader is not None)

    def test_create_collection(self):
        coll = self.conn.create_collection('test2')
        self.assertTrue(isinstance(coll,SolrCollection))
        self.conn.test2.drop()
Пример #15
0
class TestCollectionSearch(unittest.TestCase):

    def setUp(self):
        self.conn = SolrConnection(version=os.getenv('SOLR_VERSION', '6.1.0'))
        self.collparams = {}
        confname = os.getenv('SOLR_CONFNAME', '')
        if confname != '':
            self.collparams['collection_config_name'] = confname

    def test_add(self):
        coll2 = self.conn.create_collection('coll2', **self.collparams)
        docs = [{"id": str(_id), "includes": "silly text"} for _id in range(5)]

        coll2.add(docs)
        coll2.commit()
        res = coll2.search({"q": "id:1"}).result
        self.assertTrue(len(res.response.docs) == 1)
        coll2.drop()

    def test_delete(self):
        coll2 = self.conn.create_collection('coll2', **self.collparams)
        docs = [{"id": str(_id), "includes": "silly text"} for _id in range(5)]

        coll2.add(docs)
        coll2.commit()

        # delete w/ object
        so = SearchOptions()
        so.commonparams.q("id:1")
        coll2.delete(so)
        res = coll2.search({"q": "id:1"}).result
        self.assertTrue(len(res.response.docs) == 0)

        # delete w/ dict
        so = {"q": "id:2"}
        coll2.delete(so)
        res = coll2.search({"q": "id:2"}).result
        self.assertTrue(len(res.response.docs) == 0)

        coll2.drop()
Пример #16
0
class TestCollectionSearch(unittest.TestCase):
    def setUp(self):
        self.conn = SolrConnection()

    def test_add(self):
        coll2 = self.conn.create_collection('coll2')
        docs = [{"id": str(_id), "includes": "silly text"} for _id in range(5)]

        coll2.add(docs)
        coll2.commit()
        res = coll2.search({"q": "id:1"}).result
        self.assertTrue(len(res.response.docs) == 1)
        coll2.drop()
Пример #17
0
class TestCollectionSearch(unittest.TestCase):
    def setUp(self):
        self.conn = SolrConnection()

    def test_add(self):
        coll2 = self.conn.create_collection('coll2')
        docs = [{"id":str(_id),"includes":"silly text"} for _id in range(5)]

        coll2.add(docs)
        coll2.commit()
        res = coll2.search({"q":"id:1"}).result
        self.assertTrue(len(res.response.docs)== 1)
        coll2.drop()
Пример #18
0
class TestCollectionSearch(unittest.TestCase):

    def setUp(self):
        self.conn = SolrConnection()

    def test_add(self):
        coll2 = self.conn.create_collection('coll2')
        docs = [{"id": str(_id), "includes": "silly text"} for _id in range(5)]

        coll2.add(docs)
        coll2.commit()
        res = coll2.search({"q": "id:1"}).result
        self.assertTrue(len(res.response.docs) == 1)
        coll2.drop()

    def test_delete(self):
        coll2 = self.conn.create_collection('coll2')
        docs = [{"id": str(_id), "includes": "silly text"} for _id in range(5)]

        coll2.add(docs)
        coll2.commit()

        # delete w/ object
        so = SearchOptions()
        so.commonparams.q("id:1")
        coll2.delete(so)
        res = coll2.search({"q": "id:1"}).result
        self.assertTrue(len(res.response.docs) == 0)

        # delete w/ dict
        so = {"q": "id:2"}
        coll2.delete(so)
        res = coll2.search({"q": "id:2"}).result
        self.assertTrue(len(res.response.docs) == 0)

        coll2.drop()
Пример #19
0
def disease_query(coll_name, disease_phewas_df):
    # check solr collection
    conn = SolrConnection(timeout=6000)
    pa_coll = conn[coll_name]
    print ('Num disease: ', len(disease_phewas_df))

    # query solr with each disease name
    # serial
    disease_docidx_arr = []
    for disease_record in disease_phewas_df.iterrows():
        result_arr = solr_query(pa_coll, disease_record)   
        
        for res in result_arr:
            disease_docidx_arr.append(res)

    return disease_docidx_arr
Пример #20
0
class TestCollectionAdmin(unittest.TestCase):
    def setUp(self):
        self.conn = Connection()

    def test_create_collection(self):
        coll2 = self.conn.create_collection('coll2')
        time.sleep(3)
        coll2.drop()
        time.sleep(3)

    def test_reload(self):
        coll2 = self.conn.create_collection('coll2')
        time.sleep(3)
        res = coll2.reload()
        self.assertTrue(getattr(res, 'success') is not None)
        coll2.drop()

    def test_split_shard(self):
        coll2 = self.conn.create_collection('coll2')
        time.sleep(3)
        res = coll2.split_shard('shard1',
                                ranges="80000000-90000000,90000001-7fffffff")
        time.sleep(3)
        self.assertTrue(getattr(res, 'success') is not None)
        coll2.drop()

    def test_create_shard(self):
        coll2 = self.conn.create_collection('coll2',
                                            router_name='implicit',
                                            shards='myshard1',
                                            max_shards_per_node=3)
        time.sleep(3)
        res = coll2.create_shard('shard_my')
        time.sleep(3)
        self.assertTrue(getattr(res, 'success') is not None)
        coll2.drop()

    def test_create_delete_alias(self):
        coll2 = self.conn.create_collection('coll2')
        coll2.create_alias('alias2')
        time.sleep(3)
        self.assertTrue(self.conn.alias2.is_alias())
        coll2.delete_alias('alias2')
        coll2.drop()

    def test_delete_replica(self):
        coll2 = self.conn.create_collection('coll2',
                                            router_name='implicit',
                                            shards='myshard1',
                                            max_shards_per_node=6,
                                            replication_factor=2)
        time.sleep(3)
        coll2.delete_replica('core_node2', 'myshard1')
        coll2.drop()
Пример #21
0
class TestCollectionAdmin(unittest.TestCase):
    def setUp(self):
        self.conn = Connection()

    def test_create_collection(self):
        coll2 = self.conn.create_collection('coll2')
        time.sleep(3)
        coll2.drop()
        time.sleep(3)

    def test_reload(self):
        coll2 = self.conn.create_collection('coll2')
        time.sleep(3)
        res = coll2.reload()
        self.assertTrue(getattr(res,'success') is not None)
        coll2.drop()

    def test_split_shard(self):
        coll2 = self.conn.create_collection('coll2')
        time.sleep(3)
        res = coll2.split_shard('shard1',ranges="80000000-90000000,90000001-7fffffff")
        time.sleep(3)
        self.assertTrue(getattr(res,'success') is not None)
        coll2.drop()

    def test_create_shard(self):
        coll2 = self.conn.create_collection('coll2',
                                            router_name='implicit',
                                            shards='myshard1',max_shards_per_node=3)
        time.sleep(3)
        res = coll2.create_shard('shard_my')
        time.sleep(3)
        self.assertTrue(getattr(res,'success') is not None)
        coll2.drop()

    def test_create_delete_alias(self):
        coll2 = self.conn.create_collection('coll2')
        coll2.create_alias('alias2')
        time.sleep(3)
        self.assertTrue(self.conn.alias2.is_alias())
        coll2.delete_alias('alias2')
        coll2.drop()

    def test_delete_replica(self):
        coll2 = self.conn.create_collection('coll2',
                                            router_name='implicit',
                                            shards='myshard1',
                                            max_shards_per_node=6,
                                            replication_factor=2)
        time.sleep(3)
        coll2.delete_replica('core_node2','myshard1')
        coll2.drop()
Пример #22
0
def init(args):
    global solr_connection
    solr_connection = SolrConnection(args.solr)
    global solr_collection
    solr_collection = solr_connection[args.collection]

    dc_policy = RoundRobinPolicy()
    token_policy = TokenAwarePolicy(dc_policy)

    global cassandra_cluster
    cassandra_cluster = Cluster(contact_points=args.cassandra,
                                port=args.cassandraPort,
                                protocol_version=int(
                                    args.cassandraProtocolVersion),
                                load_balancing_policy=token_policy)
    global cassandra_session
    cassandra_session = cassandra_cluster.connect(
        keyspace=args.cassandraKeyspace)

    global cassandra_table
    cassandra_table = args.cassandraTable
Пример #23
0
    def open_spider(self, spider):
        solr_collection_name = self.solr_collection_map.get(spider.name)
        if solr_collection_name:
            self.solr_collection_name = solr_collection_name
        if not self.solr_collection_name:
            spider.log("No collection associated with " + spider.name + "!",
                       level=log.CRITICAL)
            raise CloseSpider
        if self.solr_cloud_mode:
            from solrcloudpy import SolrConnection

            self.solr_connection = SolrConnection(
                server=self.solr_servers,
                detect_live_nodes=self.solr_detect_live_nodes,
                user=self.solr_user,
                password=self.solr_password,
                timeout=self.solr_timeout,
                webappdir=self.solr_web_app)
            self.solr_collection = self.solr_connection[
                self.solr_collection_name]
        else:
            from solr import Solr
            from urlparse import urljoin

            collection_url = reduce(urljoin,
                                    (self.solr_servers[0], self.solr_web_app,
                                     self.solr_collection_name))
            if isinstance(collection_url, unicode):
                collection_url = collection_url.encode("UTF-8")
            self.solr_collection = Solr(url=collection_url,
                                        http_user=self.solr_user,
                                        http_pass=self.solr_password,
                                        timeout=self.solr_timeout)

        if self.solr_cache_max_len > 0:
            max_len = self.solr_cache_max_len * 2
        else:
            max_len = 2
        self.cache_buffer[spider.name] = SpiderCache(maxlen=max_len)
        self.locks[spider.name] = Lock()
Пример #24
0
class TestCollectionSearch(unittest.TestCase):
    def setUp(self):
        self.conn = SolrConnection(version=os.getenv("SOLR_VERSION", "6.1.0"))
        self.collparams = {}
        confname = os.getenv("SOLR_CONFNAME", "")
        if confname != "":
            self.collparams["collection_config_name"] = confname

    def test_add(self):
        coll2 = self.conn.create_collection("coll2", **self.collparams)
        docs = [{"id": str(_id), "includes": "silly text"} for _id in range(5)]

        coll2.add(docs)
        coll2.commit()
        res = coll2.search({"q": "id:1"}).result
        self.assertTrue(len(res.response.docs) == 1)
        coll2.drop()

    def test_delete(self):
        coll2 = self.conn.create_collection("coll2", **self.collparams)
        docs = [{"id": str(_id), "includes": "silly text"} for _id in range(5)]

        coll2.add(docs)
        coll2.commit()

        # delete w/ object
        so = SearchOptions()
        so.commonparams.q("id:1")
        coll2.delete(so)
        res = coll2.search({"q": "id:1"}).result
        self.assertTrue(len(res.response.docs) == 0)

        # delete w/ dict
        so = {"q": "id:2"}
        coll2.delete(so)
        res = coll2.search({"q": "id:2"}).result
        self.assertTrue(len(res.response.docs) == 0)

        coll2.drop()

    def test_custom_params_search(self):
        coll2 = self.conn.create_collection("coll2", **self.collparams)
        docs = [{"id": str(_id), "includes": "silly text"} for _id in range(5)]

        res_1 = coll2.add(docs, {"omitHeader": "false"})
        self.assertEqual(0, res_1.responseHeader.status)

        coll2.commit()
        res_2 = coll2.search({"q": "id:1", "omitHeader": "false"}).result
        self.assertEqual(0, res_2.responseHeader.status)

    def test_post_body_search(self):
        coll2 = self.conn.create_collection("coll2", **self.collparams)
        docs = [{"id": str(_id), "includes": "silly text"} for _id in range(5)]

        coll2.add(docs)
        coll2.commit()
        # JSON DSL Query format
        res = coll2.search({}, "POST", '{"query": "id:1"}').result
        self.assertTrue(len(res.response.docs) == 1)
        coll2.drop()
Пример #25
0
 def setUp(self):
     self.conn = SolrConnection(version=os.getenv('SOLR_VERSION', '6.1.0'))
     self.collparams = {}
     confname = os.getenv('SOLR_CONFNAME', '')
     if confname != '':
         self.collparams['collection_config_name'] = confname
Пример #26
0
class TestCollectionAdmin(unittest.TestCase):
    def setUp(self):
        self.conn = SolrConnection(version=os.getenv('SOLR_VERSION', '6.1.0'))
        self.collparams = {}
        confname = os.getenv('SOLR_CONFNAME', '')
        if confname != '':
            self.collparams['collection_config_name'] = confname

    def test_create_collection(self):
        original_count = len(self.conn.list())
        coll2 = self.conn.create_collection('coll2', **self.collparams)
        self.assertEqual(len(self.conn.list()), original_count+1)
        self.conn.list()
        time.sleep(3)
        coll3 = self.conn.create_collection('coll3', **self.collparams)
        self.assertEqual(len(self.conn.list()), original_count+2)
        # todo calling state here means the integration works, but what should we assert?
        coll2.state
        coll2.drop()
        self.assertEqual(len(self.conn.list()), original_count+1)
        time.sleep(3)
        coll3.drop()
        self.assertEqual(len(self.conn.list()), original_count)

    def test_reload(self):
        coll2 = self.conn.create_collection('coll2', **self.collparams)
        time.sleep(3)
        res = coll2.reload()
        self.assertTrue(getattr(res, 'success') is not None)
        coll2.drop()

    def test_split_shard(self):
        coll2 = self.conn.create_collection('coll2', **self.collparams)
        time.sleep(3)
        res = coll2.split_shard('shard1', ranges="80000000-90000000,90000001-7fffffff")
        time.sleep(3)
        self.assertTrue(getattr(res, 'success') is not None)
        coll2.drop()

    def test_create_shard(self):
        coll2 = self.conn.create_collection('coll2',
                                            router_name='implicit',
                                            shards='myshard1', max_shards_per_node=3,
                                            **self.collparams)
        time.sleep(3)
        res = coll2.create_shard('shard_my')
        time.sleep(3)
        self.assertTrue(getattr(res, 'success') is not None)
        coll2.drop()

    def test_create_delete_alias(self):
        coll2 = self.conn.create_collection('coll2', **self.collparams)
        coll2.create_alias('alias2')
        time.sleep(3)
        self.assertTrue(self.conn.alias2.is_alias())
        coll2.delete_alias('alias2')
        coll2.drop()

    def test_delete_replica(self):
        try:
            coll2 = self.conn.create_collection('test_delete_replica',
                                                router_name='implicit',
                                                shards='myshard1',
                                                max_shards_per_node=6,
                                                replication_factor=2,
                                                **self.collparams)
        except ReadTimeout:
            print("Encountered read timeout while testing delete replicate")
            print("This generally doesn't mean the collection wasn't created with the settings passed.")
            coll2 = self.conn['test_delete_replica']
        time.sleep(3)
        firstReplica = list(coll2.shards['shards']['myshard1']['replicas'].dict.keys())[0]
        result = coll2.delete_replica(firstReplica, 'myshard1')
        self.assertTrue(result.success)
        coll2.drop()
Пример #27
0
 def setUp(self):
     self.conn = SolrConnection(version=os.getenv("SOLR_VERSION", "6.1.0"))
     self.collparams = {}
     confname = os.getenv("SOLR_CONFNAME", "")
     if confname != "":
         self.collparams["collection_config_name"] = confname
Пример #28
0
class SolrBackend(object):
    def __init__(self, table, core="collection1"):
        self.table = table
        self.core = core
        self.url = 'localhost:8983'
        try:
            self.interface = SolrConnection(self.url)[self.core]
        except Exception as e:
            logger.warning("Cannot connect to Solr: %s" % e)
            raise RuntimeError("Cannot connect to Solr: %s" % e)

    def get_ids(self, queryset):
        return [r.id for r in queryset.select(self.table._id)]

    def indexes(self, *fieldnames):
        self.fieldnames = fieldnames

    def after_insert(self, fields, id):
        document = [{'id': id}]
        for name in self.fieldnames:
            if name in fields:
                document[0][name] = str(fields[name])
        self.interface.add(document)
        self.interface.commit()
        return True

    def after_update(self, queryset, fields):
        """ caveat, this should work but only if ALL indexed fields are updated at once """
        ids = self.get_ids(queryset)
        documents = []
        for id in ids:
            self.interface.delete({'q':'id:%i'%id})
            document = {'id':id}
            for name in self.fieldnames:
                if name in fields:
                    document[name] = str(fields[name])
            documents.append(document)
        self.interface.add(documents)
        self.interface.commit()
        return True
    def index_table(self, query, db):
        for row in db(query).select():
            self.interface.delete({'q':'id:%i'%row.id})
        self.interface.commit()

        
        documents = []
        for row in db(query).select():
            document = {'id':row.id}
            for name in self.fieldnames:
                document[name] = str(row[name])
            documents.append(document)
        self.interface.add(documents)
        self.interface.commit()
        return True

    def update(self, query, fields, db, **core_fields):
        rows = db(query).select(*fields)
        documents = []
        for row in rows:
            document={}
            for key in row.keys():
                for core_field in core_fields:
                    if core_field in key:
                        document[key] = str(row[key])
                        if key == 'id':
                            self.interface.delete({'q':'id:%i'%row[key]})
            documents.append(document)
        self.interface.add(documents)
        self.interface.commit()
        return True


        



    def before_delete(self, queryset):
        self.ids = self.get_ids(queryset)
        return False

    def after_delete(self, queryset):
        self.ids = self.get_ids(queryset)
        for id in self.ids:
            self.interface.delete({'q':'id:%i'%id})
        self.interface.commit()
        return True

    def meta_search(self, limit, offset, mode, compact, sort, **fieldkeys):
        query = ''
        items = len(fieldkeys)
        count = 0
        # Convert to solrcloudpy search
        for fieldkey in fieldkeys:
            query += " %s:%s " % (fieldkey, fieldkeys[fieldkey])
            count += 1
            if items > 1 and count < items:
                query += mode

        se = SearchOptions()
        se.commonparams.q(query).rows(limit).sort(sort).start(offset)
        response = self.interface.search(se)
        if compact:
            return [r['id'] for r in response.result['response'].docs]
        return response.result['response']
Пример #29
0
from solrcloudpy import SolrConnection
import os

connection = SolrConnection(["localhost:8983", "localhost:7574"], version=os.getenv("SOLR_VERSION", "5.3.2"))
for collection_name in connection.list():
    print "Dropping %s" % collection_name
    connection[collection_name].drop()
Пример #30
0
class TestCollectionAdmin(unittest.TestCase):
    def setUp(self):
        self.conn = SolrConnection(version=os.getenv("SOLR_VERSION", "6.1.0"))
        self.collparams = {}
        confname = os.getenv("SOLR_CONFNAME", "")
        if confname != "":
            self.collparams["collection_config_name"] = confname

    def test_create_collection(self):
        original_count = len(self.conn.list())
        coll2 = self.conn.create_collection("coll2", **self.collparams)
        self.assertEqual(len(self.conn.list()), original_count + 1)
        self.conn.list()
        time.sleep(3)
        coll3 = self.conn.create_collection("coll3", **self.collparams)
        self.assertEqual(len(self.conn.list()), original_count + 2)
        # todo calling state here means the integration works, but what should we assert?
        coll2.state
        coll2.drop()
        self.assertEqual(len(self.conn.list()), original_count + 1)
        time.sleep(3)
        coll3.drop()
        self.assertEqual(len(self.conn.list()), original_count)

    def test_reload(self):
        coll2 = self.conn.create_collection("coll2", **self.collparams)
        time.sleep(3)
        res = coll2.reload()
        self.assertTrue(getattr(res, "success") is not None)
        coll2.drop()

    def test_split_shard(self):
        coll2 = self.conn.create_collection("coll2", **self.collparams)
        time.sleep(3)
        res = coll2.split_shard("shard1", ranges="80000000-90000000,90000001-7fffffff")
        time.sleep(3)
        self.assertTrue(getattr(res, "success") is not None)
        coll2.drop()

    def test_create_shard(self):
        coll2 = self.conn.create_collection(
            "coll2", router_name="implicit", shards="myshard1", max_shards_per_node=3, **self.collparams
        )
        time.sleep(3)
        res = coll2.create_shard("shard_my")
        time.sleep(3)
        self.assertTrue(getattr(res, "success") is not None)
        coll2.drop()

    def test_create_delete_alias(self):
        coll2 = self.conn.create_collection("coll2", **self.collparams)
        coll2.create_alias("alias2")
        time.sleep(3)
        self.assertTrue(self.conn.alias2.is_alias())
        coll2.delete_alias("alias2")
        coll2.drop()

    def test_delete_replica(self):
        try:
            coll2 = self.conn.create_collection(
                "test_delete_replica",
                router_name="implicit",
                shards="myshard1",
                max_shards_per_node=6,
                replication_factor=2,
                **self.collparams
            )
        except ReadTimeout:
            print "Encountered read timeout while testing delete replicate"
            print "This generally doesn't mean the collection wasn't created with the settings passed."
            coll2 = self.conn["test_delete_replica"]
        time.sleep(3)
        coll2.delete_replica("core_node2", "myshard1")
        coll2.drop()
Пример #31
0
class SolrBackend(object):
    def __init__(self, table, core="collection1"):
        self.table = table
        self.core = core
        self.url = 'localhost:8983'
        try:
            self.interface = SolrConnection(self.url)[self.core]
        except Exception as e:
            logger.warning("Cannot connect to Solr: %s" % e)
            raise RuntimeError("Cannot connect to Solr: %s" % e)

    def get_ids(self, queryset):
        return [r.id for r in queryset.select(self.table._id)]

    def indexes(self, *fieldnames):
        self.fieldnames = fieldnames

    def after_insert(self, fields, id):
        document = [{'id': id}]
        for name in self.fieldnames:
            if name in fields:
                document[0][name] = unicode(fields[name])
        self.interface.add(document)
        self.interface.commit()
        return True

    def after_update(self, queryset, fields):
        """ caveat, this should work but only if ALL indexed fields are updated at once """
        ids = self.get_ids(queryset)
        documents = []
        for id in ids:
            self.interface.delete(id)
            document = {'id':id}
            for name in self.fieldnames:
                if name in fields:
                    document[name] = unicode(fields[name])
            documents.append(document)
        self.interface.add(documents)
        self.interface.commit()
        return True

    def update(self, query, fields, db, **core_fields):
        '''
        Usage:

        '''
        rows = db(query).select(*fields)
        documents = []
        for row in rows:
            document={}
            for key in row.keys():
                for core_field in core_fields:
                    if core_field in row[key]:
                        document[core_fields[core_field]] = unicode(row[key][core_field])
                        if core_field == 'id':
                            self.interface.delete(row[key][core_field])
            documents.append(document)
        self.interface.add(documents)
        self.interface.commit()
        return True

    def before_delete(self, queryset):
        self.ids = self.get_ids(queryset)
        return False

    def after_delete(self):
        for id in self.ids:
            self.interface.delete(id=id)
        self.interface.commit()
        return True

    def meta_search(self, limit, offset, mode, compact, sort, **fieldkeys):
        query = ''
        items = len(fieldkeys)
        count = 0
        # Convert to solrcloudpy search
        for fieldkey in fieldkeys:
            query += " %s:%s " % (fieldkey, fieldkeys[fieldkey])
            count += 1
            if items > 1 and count < items:
                query += mode

        se = SearchOptions()
        se.commonparams.q(query).rows(limit).sort(sort).start(offset)
        print se
        response = self.interface.search(se)
        if compact:
            return [r['id'] for r in response.result['response'].docs]
        return response.result['response']
Пример #32
0
 def setUp(self):
     self.conn = Connection()
Пример #33
0
 def setUp(self):
     self.conn = Connection()
Пример #34
0
 def __init__(self):
     servers = SOLR_SERVERS
     self.cnn = SolrConnection(servers)[SOLR_COLLECTION_DEFAULT]
     self.cache_list = []
Пример #35
0
 def _init_connection(self):
     self.conn = SolrConnection(self.nodes,
                                version=self.version,
                                timeout=6000)
Пример #36
0
 def __init__(self):
     servers = SOLR_SERVERS
     self.cnn = SolrConnection(servers)[SOLR_COLLECTION_DEFAULT]
     self.cache_list = []
Пример #37
0
 def setUp(self):
     self.conn = SolrConnection(version=os.getenv("SOLR_VERSION", "6.1.0"))
     self.collparams = {}
     confname = os.getenv("SOLR_CONFNAME", "")
     if confname != "":
         self.collparams["collection_config_name"] = confname
Пример #38
0
class TestCollectionSearch(unittest.TestCase):
    def setUp(self):
        self.conn = SolrConnection(version=os.getenv('SOLR_VERSION', '6.1.0'))
        self.collparams = {}
        confname = os.getenv('SOLR_CONFNAME', '')
        if confname != '':
            self.collparams['collection_config_name'] = confname

    def test_add(self):
        coll2 = self.conn.create_collection('coll2', **self.collparams)
        docs = [{"id": str(_id), "includes": "silly text"} for _id in range(5)]

        coll2.add(docs)
        coll2.commit()
        res = coll2.search({"q": "id:1"}).result
        self.assertTrue(len(res.response.docs) == 1)
        coll2.drop()

    def test_delete(self):
        coll2 = self.conn.create_collection('coll2', **self.collparams)
        docs = [{"id": str(_id), "includes": "silly text"} for _id in range(5)]

        coll2.add(docs)
        coll2.commit()

        # delete w/ object
        so = SearchOptions()
        so.commonparams.q("id:1")
        coll2.delete(so)
        res = coll2.search({"q": "id:1"}).result
        self.assertTrue(len(res.response.docs) == 0)

        # delete w/ dict
        so = {"q": "id:2"}
        coll2.delete(so)
        res = coll2.search({"q": "id:2"}).result
        self.assertTrue(len(res.response.docs) == 0)

        coll2.drop()

    def test_custom_params_search(self):
        coll2 = self.conn.create_collection('coll2', **self.collparams)
        docs = [{"id": str(_id), "includes": "silly text"} for _id in range(5)]

        res_1 = coll2.add(docs, {'omitHeader': "false"})
        self.assertEqual(0, res_1.responseHeader.status)

        coll2.commit()
        res_2 = coll2.search({"q": "id:1", "omitHeader": "false"}).result
        self.assertEqual(0, res_2.responseHeader.status)
    
    def test_post_body_search(self):
        coll2 = self.conn.create_collection('coll2', **self.collparams)
        docs = [{"id": str(_id), "includes": "silly text"} for _id in range(5)]

        coll2.add(docs)
        coll2.commit()
        # JSON DSL Query format
        res = coll2.search({},"POST", '{"query": "id:1"}').result
        self.assertTrue(len(res.response.docs) == 1)
        coll2.drop()
Пример #39
0
 def setUp(self):
     self.conn = SolrConnection()
Пример #40
0
from solrcloudpy import SolrConnection
import os

connection = SolrConnection(['localhost:8983', 'localhost:7574'],
                            version=os.getenv('SOLR_VERSION', '5.3.2'))
for collection_name in connection.list():
    print "Dropping %s" % collection_name
    connection[collection_name].drop()
Пример #41
0
 def setUp(self):
     self.conn = SolrConnection(version=os.getenv('SOLR_VERSION', '6.1.0'))
     self.collparams = {}
     confname = os.getenv('SOLR_CONFNAME', '')
     if confname != '':
         self.collparams['collection_config_name'] = confname
Пример #42
0
class TestCollectionAdmin(unittest.TestCase):
    def setUp(self):
        self.conn = SolrConnection(version=os.getenv('SOLR_VERSION', '6.1.0'))
        self.collparams = {}
        confname = os.getenv('SOLR_CONFNAME', '')
        if confname != '':
            self.collparams['collection_config_name'] = confname

    def test_create_collection(self):
        original_count = len(self.conn.list())
        coll2 = self.conn.create_collection('coll2', **self.collparams)
        self.assertEqual(len(self.conn.list()), original_count + 1)
        self.conn.list()
        time.sleep(3)
        coll3 = self.conn.create_collection('coll3', **self.collparams)
        self.assertEqual(len(self.conn.list()), original_count + 2)
        # todo calling state here means the integration works, but what should we assert?
        coll2.state
        coll2.drop()
        self.assertEqual(len(self.conn.list()), original_count + 1)
        time.sleep(3)
        coll3.drop()
        self.assertEqual(len(self.conn.list()), original_count)

    def test_reload(self):
        coll2 = self.conn.create_collection('coll2', **self.collparams)
        time.sleep(3)
        res = coll2.reload()
        self.assertTrue(getattr(res, 'success') is not None)
        coll2.drop()

    def test_split_shard(self):
        coll2 = self.conn.create_collection('coll2', **self.collparams)
        time.sleep(3)
        res = coll2.split_shard('shard1',
                                ranges="80000000-90000000,90000001-7fffffff")
        time.sleep(3)
        self.assertTrue(getattr(res, 'success') is not None)
        coll2.drop()

    def test_create_shard(self):
        coll2 = self.conn.create_collection('coll2',
                                            router_name='implicit',
                                            shards='myshard1',
                                            max_shards_per_node=3,
                                            **self.collparams)
        time.sleep(3)
        res = coll2.create_shard('shard_my')
        time.sleep(3)
        self.assertTrue(getattr(res, 'success') is not None)
        coll2.drop()

    def test_create_delete_alias(self):
        coll2 = self.conn.create_collection('coll2', **self.collparams)
        coll2.create_alias('alias2')
        time.sleep(3)
        self.assertTrue(self.conn.alias2.is_alias())
        coll2.delete_alias('alias2')
        coll2.drop()

    def test_delete_replica(self):
        try:
            coll2 = self.conn.create_collection('test_delete_replica',
                                                router_name='implicit',
                                                shards='myshard1',
                                                max_shards_per_node=6,
                                                replication_factor=2,
                                                **self.collparams)
        except ReadTimeout:
            print "Encountered read timeout while testing delete replicate"
            print "This generally doesn't mean the collection wasn't created with the settings passed."
            coll2 = self.conn['test_delete_replica']
        time.sleep(3)
        coll2.delete_replica('core_node2', 'myshard1')
        coll2.drop()
Пример #43
0
class QueueSolr(object):
    """
    SolrDB 连接模块
    """
    def __init__(self, table, server=None, **kwargs):
        """
        :param server: list or str
        :param table: collection
        :param kwargs:  detect_live_nodes=False,
                         user=None,
                         password=None,
                         timeout=10,
                         webappdir='solr'
        :return:
        """
        if 'host' in kwargs:
            del kwargs['host']
        if 'port' in kwargs:
            del kwargs['port']
        self.conn = SolrConnection(server=server, **kwargs)
        self.table = table
        self.collection = self.conn[table]

    @QueueBase.catch
    def collections(self):
        """
        获取所以集合列表
        :return:
        """
        return self.conn.list()

    @QueueBase.catch
    def find(self, *args, **kwargs):
        """
        查找某个集合下的field
        查找的字段必须键索引
        否则会报400 error
        :param args:
        :param kwargs:
        :return:
        """
        for dict_ in args:
            if isinstance(dict_, dict):
                kwargs.update(dict_)
        valuess = ' AND '.join([
            '%s:%s' % (k, v) for k, v in kwargs.items()
        ]) if len(kwargs) > 1 else ':'.join(kwargs.keys() + kwargs.values())
        q_item = {'q': valuess}
        #q_item=SearchOptions().commonparams.q(valuess)
        return self.collection.search(q_item).result['response']

    @QueueBase.catch
    def update(self, *args, **kwargs):
        """
        更新数据
        更新的数据字段
        原表必须存在,*_temp都可以
        :param args:
        :param kwargs:
        :return:
        """
        for dict_ in args:
            if isinstance(dict_, dict):
                kwargs.update(dict_)
        self.collection.add([kwargs])
        log.info(u"%s Storage success!" % json.dumps(kwargs))

    @QueueBase.catch
    def delete(self, *args, **kwargs):
        """
        删除数据
        删除的字段原表也必须存在
        否则400 Client Error: Bad Request
        :param args:
        :param kwargs:
        :return:
        """
        for dict_ in args:
            if isinstance(dict_, dict):
                kwargs.update(dict_)
        valuess = ' AND '.join([
            '%s:%s' % (k, v) for k, v in kwargs.items()
        ]) if len(kwargs) > 1 else ':'.join(kwargs.keys() + kwargs.values())
        q_item = {'q': valuess}
        self.collection.delete(q_item, commit=False)
        log.info(u"%s deleted!" % json.dumps(kwargs))
Пример #44
0
class TestCollectionAdmin(unittest.TestCase):
    def setUp(self):
        self.conn = SolrConnection(version=os.getenv("SOLR_VERSION", "6.1.0"))
        self.collparams = {}
        confname = os.getenv("SOLR_CONFNAME", "")
        if confname != "":
            self.collparams["collection_config_name"] = confname

    def test_create_collection(self):
        original_count = len(self.conn.list())
        coll2 = self.conn.create_collection("coll2", **self.collparams)
        time.sleep(3)
        self.assertEqual(len(self.conn.list()), original_count + 1)
        self.conn.list()
        time.sleep(3)
        coll3 = self.conn.create_collection("coll3", **self.collparams)
        time.sleep(3)
        self.assertEqual(len(self.conn.list()), original_count + 2)
        # todo calling state here means the integration works, but what should we assert?
        coll2.state
        coll2.drop()
        time.sleep(3)
        self.assertEqual(len(self.conn.list()), original_count + 1)
        time.sleep(3)
        coll3.drop()
        time.sleep(3)
        self.assertEqual(len(self.conn.list()), original_count)

    def test_reload(self):
        coll2 = self.conn.create_collection("coll2", **self.collparams)
        time.sleep(3)
        res = coll2.reload()
        self.assertTrue(getattr(res, "success") is not None)
        coll2.drop()

    def test_split_shard(self):
        coll2 = self.conn.create_collection("coll2", **self.collparams)
        time.sleep(3)
        res = coll2.split_shard("shard1",
                                ranges="80000000-90000000,90000001-7fffffff")
        time.sleep(3)
        self.assertTrue(getattr(res, "success") is not None)
        coll2.drop()

    def test_create_shard(self):
        coll2 = self.conn.create_collection("coll2",
                                            router_name="implicit",
                                            shards="myshard1",
                                            max_shards_per_node=3,
                                            **self.collparams)
        time.sleep(3)
        res = coll2.create_shard("shard_my")
        time.sleep(3)
        self.assertTrue(getattr(res, "success") is not None)
        coll2.drop()

    def test_create_delete_alias(self):
        coll2 = self.conn.create_collection("coll2", **self.collparams)
        coll2.create_alias("alias2")
        time.sleep(3)
        self.assertTrue(self.conn.alias2.is_alias())
        coll2.delete_alias("alias2")
        coll2.drop()

    def test_delete_replica(self):
        try:
            coll2 = self.conn.create_collection("test_delete_replica",
                                                router_name="implicit",
                                                shards="myshard1",
                                                max_shards_per_node=6,
                                                replication_factor=2,
                                                **self.collparams)
        except ReadTimeout:
            print("Encountered read timeout while testing delete replicate")
            print(
                "This generally doesn't mean the collection wasn't created with the settings passed."
            )
            coll2 = self.conn["test_delete_replica"]
        time.sleep(3)
        firstReplica = list(
            coll2.shards["shards"]["myshard1"]["replicas"].dict.keys())[0]
        result = coll2.delete_replica(firstReplica, "myshard1")
        self.assertTrue(result.success)
        coll2.drop()
Пример #45
0
 def setUp(self):
     self.conn = SolrConnection()
Пример #46
0
import os

from solrcloudpy import SolrConnection

connection = SolrConnection(["localhost:8983", "localhost:7574"],
                            version=os.getenv("SOLR_VERSION", "7.7.0"))
for collection_name in connection.list():
    print("Dropping %s" % collection_name)
    connection[collection_name].drop()