def test_no_nodes(self): """ Ensure query plan for an empty cluster will execute without errors """ policy = DCAwareRoundRobinPolicy("dc1", used_hosts_per_remote_dc=1) policy.populate(None, []) qplan = list(policy.make_query_plan()) self.assertEqual(qplan, [])
def test_no_remote(self): hosts = [] for i in range(4): h = Host(i, SimpleConvictionPolicy) h.set_location_info("dc1", "rack1") hosts.append(h) policy = DCAwareRoundRobinPolicy("dc1") policy.populate(None, hosts) qplan = list(policy.make_query_plan()) self.assertEqual(sorted(qplan), sorted(hosts))
def setup_tables(self): key_space = self.env.config.get(ConfigKeys.KEY_SPACE, domain=ConfigKeys.STORAGE) hosts = self.env.config.get(ConfigKeys.HOST, domain=ConfigKeys.STORAGE) hosts = hosts.split(",") # required to specify execution profiles in future versions profiles = { # override the default so we can set consistency level later EXEC_PROFILE_DEFAULT: ExecutionProfile( load_balancing_policy=TokenAwarePolicy( DCAwareRoundRobinPolicy()), retry_policy=RetryPolicy(), request_timeout=10.0, row_factory=Session._row_factory, # noqa # should probably be changed to QUORUM when having more than 3 nodes in the cluster consistency_level=ConsistencyLevel.LOCAL_ONE, ), # TODO: there doesn't seem to be a way to specify execution profile when # using the library's object mapping approach, only when writing pure # cql queries: # https://docs.datastax.com/en/developer/python-driver/3.24/execution_profiles/ # batch profile has longer timeout since they are run async anyway "batch": ExecutionProfile( load_balancing_policy=TokenAwarePolicy( DCAwareRoundRobinPolicy()), request_timeout=120.0, consistency_level=ConsistencyLevel.LOCAL_ONE, ) } kwargs = { "default_keyspace": key_space, "protocol_version": 3, "retry_connect": True, "execution_profiles": profiles, } username = self._get_from_conf(ConfigKeys.USER, ConfigKeys.STORAGE) password = self._get_from_conf(ConfigKeys.PASSWORD, ConfigKeys.STORAGE) if password is not None: auth_provider = PlainTextAuthProvider( username=username, password=password, ) kwargs["auth_provider"] = auth_provider connection.setup(hosts, **kwargs) sync_table(MessageModel) sync_table(AttachmentModel)
def test_DC_aware_round_robin_policy(self): self.assertEqual( insights_registry.serialize(DCAwareRoundRobinPolicy()), {'namespace': 'cassandra.policies', 'options': {'local_dc': '', 'used_hosts_per_remote_dc': 0}, 'type': 'DCAwareRoundRobinPolicy'} ) self.assertEqual( insights_registry.serialize(DCAwareRoundRobinPolicy(local_dc='fake_local_dc', used_hosts_per_remote_dc=15)), {'namespace': 'cassandra.policies', 'options': {'local_dc': 'fake_local_dc', 'used_hosts_per_remote_dc': 15}, 'type': 'DCAwareRoundRobinPolicy'} )
def init(self, config): log = logging.getLogger(__name__) log.info("*** STARTING DOMS INITIALIZATION ***") domsconfig = ConfigParser.RawConfigParser() domsconfig.readfp(pkg_resources.resource_stream(__name__, "domsconfig.ini"), filename='domsconfig.ini') cassHost = domsconfig.get("cassandra", "host") cassKeyspace = domsconfig.get("cassandra", "keyspace") cassDatacenter = domsconfig.get("cassandra", "local_datacenter") cassVersion = int(domsconfig.get("cassandra", "protocol_version")) log.info("Cassandra Host(s): %s" % (cassHost)) log.info("Cassandra Keyspace: %s" % (cassKeyspace)) log.info("Cassandra Datacenter: %s" % (cassDatacenter)) log.info("Cassandra Protocol Version: %s" % (cassVersion)) dc_policy = DCAwareRoundRobinPolicy(cassDatacenter) token_policy = TokenAwarePolicy(dc_policy) with Cluster([host for host in cassHost.split(',')], load_balancing_policy=token_policy, protocol_version=cassVersion) as cluster: session = cluster.connect() self.createKeyspace(session, cassKeyspace) self.createTables(session)
def _production_connect(self): """ assumes multiple node Cassandra cluster """ connection.setup(hosts=self.hosts, default_keyspace=self.keyspace, consistency=CL.LOCAL_QUORUM, port=self.port, cql_version=self.cql_version, lazy_connect=True, retry_connect=False, compression=True, auth_provider=None, load_balancing_policy=DCAwareRoundRobinPolicy( local_dc='dc1', used_hosts_per_remote_dc=0), protocol_version=4, executor_threads=2, reconnection_policy=None, default_retry_policy=None, conviction_policy_factory=None, metrics_enabled=False, connection_class=None, ssl_options=None, sockopts=None, max_schema_agreement_wait=10, control_connection_timeout=2.0, idle_heartbeat_interval=30, schema_event_refresh_window=2, topology_event_refresh_window=10, connect_timeout=self.connect_timeout) return
async def connect(cls, contact_points=None, keyspace=None, port=None, username=None, password=None): """ Set connection. :param contact_points: list of contact points [cp1, cp1, ..., cpN] :param keyspace: keyspace name :param port: port :param username: username :param password: password """ cls.logger.debug("Setting connection") cls._set_connection_parameters(contact_points, keyspace, port, username, password) try: cls._check_connection_parameters() cls.auth = PlainTextAuthProvider(username=cls.username, password=cls.password) lbp = DCAwareRoundRobinPolicy() cls.cluster = Cluster(cls.contact_points, auth_provider=cls.auth, port=cls.port, load_balancing_policy=lbp) cls.session = cls.cluster.connect(cls.keyspace) cls.logger.debug("Connection set") except Exception as err: cls.logger.error("Connection failed: " + str(err)) raise Exception("Connection failed: " + str(err))
def conn(self, *, ssl_path: str = None) -> Session: # type: ignore """Establishes a Cassandra connection.""" if not self._session: auth_provider = (PlainTextAuthProvider(username=self.user, password=self.password) if self.user is not None else None) ssl_opts = ({ "ca_certs": ssl_path, "ssl_version": PROTOCOL_TLSv1, "cert_reqs": CERT_REQUIRED, } if ssl_path is not None else None) execution_profiles = { EXEC_PROFILE_DEFAULT: ExecutionProfile( load_balancing_policy=DCAwareRoundRobinPolicy(), consistency_level=ConsistencyLevel.LOCAL_QUORUM, row_factory=dict_factory, ) } cluster = Cluster( contact_points=self.host, auth_provider=auth_provider, ssl_options=ssl_opts, execution_profiles=execution_profiles, ) self._session = cluster.connect(self.keyspace) return self._session
async def init(loop): # Load configuration file config = load_config(PROJECT_ROOT / 'config' / 'config.yml') # Connect to Cassandra cluster cluster = Cluster( [config["cassandra"]["host"]], load_balancing_policy=DCAwareRoundRobinPolicy(), port=config["cassandra"]["port"]) session = cluster.connect() # Set keyspace session.set_keyspace(config["cassandra"]["keyspace"]) # Threaded Cassandra wrapper for asyncio aiosession(session) # Setup database store db_store = DbStore(session, config) # Setup server application app = web.Application(loop=loop) handler = RoutesHandler(db_store, config) register_routes(app, handler) host, port = config["api"]["host"], config["api"]["port"] return app, host, port
def establish_connection(self): """ :return: """ self._log.log("establishing connection with config : " + json.dumps(self.__repr__())) try: auth_provider = PlainTextAuthProvider(username=self._user_name, password=self._password) cluster = Cluster(contact_points=self._host, port=self._port, auth_provider=auth_provider, max_schema_agreement_wait=300, control_connection_timeout=10, connect_timeout=30, load_balancing_policy=DCAwareRoundRobinPolicy()) self._session = cluster.connect() self._session.set_keyspace(self._key_space) self._log.log("connection established") return True except Exception as exe: self.response["data"] = str(exe) self._log.log("Invalid credentials or smthin went wrong", error=str(exe)) return False
def test_get_distance(self): """ Same test as DCAwareRoundRobinPolicyTest.test_get_distance() Except a FakeCluster is needed for the metadata variable and policy.child_policy is needed to change child policy settings """ policy = TokenAwarePolicy(DCAwareRoundRobinPolicy("dc1", used_hosts_per_remote_dc=0)) host = Host("ip1", SimpleConvictionPolicy) host.set_location_info("dc1", "rack1") policy.populate(self.FakeCluster(), [host]) self.assertEqual(policy.distance(host), HostDistance.LOCAL) # used_hosts_per_remote_dc is set to 0, so ignore it remote_host = Host("ip2", SimpleConvictionPolicy) remote_host.set_location_info("dc2", "rack1") self.assertEqual(policy.distance(remote_host), HostDistance.IGNORED) # dc2 isn't registered in the policy's live_hosts dict policy._child_policy.used_hosts_per_remote_dc = 1 self.assertEqual(policy.distance(remote_host), HostDistance.IGNORED) # make sure the policy has both dcs registered policy.populate(self.FakeCluster(), [host, remote_host]) self.assertEqual(policy.distance(remote_host), HostDistance.REMOTE) # since used_hosts_per_remote_dc is set to 1, only the first # remote host in dc2 will be REMOTE, the rest are IGNORED second_remote_host = Host("ip3", SimpleConvictionPolicy) second_remote_host.set_location_info("dc2", "rack1") policy.populate(self.FakeCluster(), [host, remote_host, second_remote_host]) distances = set([policy.distance(remote_host), policy.distance(second_remote_host)]) self.assertEqual(distances, set([HostDistance.REMOTE, HostDistance.IGNORED]))
def connect_cassandra(self, keyspace): '''Connects cassandra using class parameters Yields: obj: yields class object ''' try: auth_provider = PlainTextAuthProvider(username=self.user, password=base64.b64decode( self.pwd).decode()) self.cluster = Cluster( contact_points=[self.ip_address], load_balancing_policy=DCAwareRoundRobinPolicy( local_dc='datacenter1'), port=self.port, auth_provider=auth_provider, control_connection_timeout=100, protocol_version=3,) self.logger.debug(self.cluster) self.session = self.cluster.connect() self.session.set_keyspace(keyspace) self.logger.info('Cassandra connection is established.') yield self finally: if not self.cluster.is_shutdown: self.cluster.shutdown() self.logger.info('Cassandra connection is closed.')
def get_lb_policy(policy_name: str, policy_args: Dict[str, Any]) -> Policy: """ Creates load balancing policy. :param policy_name: Name of the policy to use. :param policy_args: Parameters for the policy. """ if policy_name == 'DCAwareRoundRobinPolicy': local_dc = policy_args.get('local_dc', '') used_hosts_per_remote_dc = int(policy_args.get('used_hosts_per_remote_dc', 0)) return DCAwareRoundRobinPolicy(local_dc, used_hosts_per_remote_dc) if policy_name == 'WhiteListRoundRobinPolicy': hosts = policy_args.get('hosts') if not hosts: raise Exception('Hosts must be specified for WhiteListRoundRobinPolicy') return WhiteListRoundRobinPolicy(hosts) if policy_name == 'TokenAwarePolicy': allowed_child_policies = ( 'RoundRobinPolicy', 'DCAwareRoundRobinPolicy', 'WhiteListRoundRobinPolicy', ) child_policy_name = policy_args.get('child_load_balancing_policy', 'RoundRobinPolicy') child_policy_args = policy_args.get('child_load_balancing_policy_args', {}) if child_policy_name not in allowed_child_policies: return TokenAwarePolicy(RoundRobinPolicy()) else: child_policy = CassandraHook.get_lb_policy(child_policy_name, child_policy_args) return TokenAwarePolicy(child_policy) # Fallback to default RoundRobinPolicy return RoundRobinPolicy()
def get_lb_policy(policy_name, policy_args): policies = { 'RoundRobinPolicy': RoundRobinPolicy, 'DCAwareRoundRobinPolicy': DCAwareRoundRobinPolicy, 'WhiteListRoundRobinPolicy': WhiteListRoundRobinPolicy, 'TokenAwarePolicy': TokenAwarePolicy, } if not policies.get(policy_name) or policy_name == 'RoundRobinPolicy': return RoundRobinPolicy() if policy_name == 'DCAwareRoundRobinPolicy': local_dc = policy_args.get('local_dc', '') used_hosts_per_remote_dc = int(policy_args.get('used_hosts_per_remote_dc', 0)) return DCAwareRoundRobinPolicy(local_dc, used_hosts_per_remote_dc) if policy_name == 'WhiteListRoundRobinPolicy': hosts = policy_args.get('hosts') if not hosts: raise Exception('Hosts must be specified for WhiteListRoundRobinPolicy') return WhiteListRoundRobinPolicy(hosts) if policy_name == 'TokenAwarePolicy': allowed_child_policies = ('RoundRobinPolicy', 'DCAwareRoundRobinPolicy', 'WhiteListRoundRobinPolicy',) child_policy_name = policy_args.get('child_load_balancing_policy', 'RoundRobinPolicy') child_policy_args = policy_args.get('child_load_balancing_policy_args', {}) if child_policy_name not in allowed_child_policies: return TokenAwarePolicy(RoundRobinPolicy()) else: child_policy = CassandraHook.get_lb_policy(child_policy_name, child_policy_args) return TokenAwarePolicy(child_policy)
def get_session(): ''' Connect onto a Cassandra cluster with the driver. :return: A Cassandra session object for cluster interactions. ''' # grab the cluster information using Docker-provided enviornmental variables CASSANDRA_HOST = os.environ['CASSANDRA_HOST'] CASSANDRA_DC = os.environ['CASSANDRA_DC'] # create a cluster object that will only connect to a single data center cluster = Cluster( [CASSANDRA_HOST], load_balancing_policy=DCAwareRoundRobinPolicy(local_dc=CASSANDRA_DC), ) # use the faster event loop provider cluster.connection_class = LibevConnection # create the Cassandra session for cluster interaction session = cluster.connect() # Panda-centric row factory def pandas_factory(colnames, rows): return pd.DataFrame(rows, columns=colnames) # use Panda-centric settings session.row_factory = pandas_factory session.default_fetch_size = None return session
def main(): cluster = Cluster( ['127.0.0.1'], load_balancing_policy=DCAwareRoundRobinPolicy(), port=9042) session = cluster.connect() session.execute('USE highload') session.execute('DROP TABLE IF EXISTS jokes;') session.execute(""" CREATE TABLE jokes ( id uuid PRIMARY KEY, text text, creation_timestamp timestamp, likes int, dislikes int ) """) uuids = list() letters = string.ascii_uppercase + string.ascii_lowercase + string.digits for _ in xrange(int(sys.argv[1])): text = ''.join(random.choice(letters) for _ in xrange(200)) uid = str(uuid.uuid1()) session.execute(""" INSERT INTO jokes (id, text, creation_timestamp, likes, dislikes) VALUES({}, '{}', toTimestamp(now()), 10, 20) IF NOT EXISTS; """.format(uid, text)) uuids.append(uid) if len(sys.argv) >= 3 and sys.argv[2] == 'True': with open('inserted_uuids', 'w') as f: for el in uuids: f.write(el) f.write('\n')
def init(): global already_loaded if already_loaded: return connection.setup( ["localhost"], default_keyspace=keyspace, protocol_version=3, load_balancing_policy=DCAwareRoundRobinPolicy(local_dc='DC1'), retry_connect=True) global _cql _cql = connection.get_session() management.create_keyspace_network_topology(keyspace, {'DC1': 1}) management.sync_table(Article, keyspaces=[keyspace]) global _es _es = Elasticsearch(["localhost"], scheme="http", port=9200, sniff_on_start=False, sniff_on_connection_fail=True) if not _es.indices.exists(index=keyspace): print("PUT ES mapping") _es.indices.create(keyspace, json.loads(open('article-mapping.json').read())) already_loaded = True
def init(self, config): log = logging.getLogger(__name__) log.info("*** STARTING DOMS INITIALIZATION ***") domsconfig = ConfigParser.ConfigParser() domsconfig.read("webservice/algorithms/doms/domsconfig.ini") cassHost = domsconfig.get("cassandra", "host") cassKeyspace = domsconfig.get("cassandra", "keyspace") cassDatacenter = domsconfig.get("cassandra", "local_datacenter") cassVersion = domsconfig.get("cassandra", "protocol_version") log.info("Cassandra Host(s): %s" % (cassHost)) log.info("Cassandra Keyspace: %s" % (cassKeyspace)) log.info("Cassandra Datacenter: %s" % (cassDatacenter)) log.info("Cassandra Protocol Version: %s" % (cassVersion)) dc_policy = DCAwareRoundRobinPolicy(cassDatacenter) token_policy = TokenAwarePolicy(dc_policy) cluster = Cluster([host for host in cassHost.split(',')], load_balancing_policy=token_policy) session = cluster.connect() self.createKeyspace(session, cassKeyspace) self.createTables(session)
def __init__(self, TESTING=None): self.table = None profiles = None try: # Set the environment variables within your virtual environment # having the following names: # For production: # KEYSPACE='key1' # CONTACT_POINTS='127.0.1' # # For testing: # TEST_KEYSPACE='key2' # TEST_CONTACT_POINTS='127.0.1' # Activate the environment variable based on weather TESTING is # True or False if TESTING is None: self._keyspace = os.environ['KEYSPACE'] self._contact_points = os.environ['CONTACT_POINTS'].split(",") else: self._keyspace = os.environ['TEST_KEYSPACE'] self._contact_points = os.environ['TEST_CONTACT_POINTS'].split( ",") except (KeyError) as err: print("KEY ERROR: ", err) else: policy = ExecutionProfile( load_balancing_policy=DCAwareRoundRobinPolicy()) self._cluster = Cluster(self._contact_points, execution_profiles=profiles) self._session = self._cluster.connect(self._keyspace)
def _create_session(self): self.cluster = Cluster( self.hosts, load_balancing_policy=DCAwareRoundRobinPolicy(local_dc=DC), port=CASSANDRA_PORT) # create the keyspace if not exist self._create_keyspace() self.session = self.cluster.connect(self.keyspace) self.session_pool.append(self.cluster.connect(self.keyspace)) # create multiple sessions for i in range(SESSION_POOL_SIZE - 1): cluster = Cluster( self.hosts, load_balancing_policy=DCAwareRoundRobinPolicy(local_dc=DC), port=CASSANDRA_PORT) self.session_pool.append(cluster.connect(self.keyspace))
def connect(self): _logger.debug("Connecting to %s", self.nodes_addresses) load_balancing_policy = TokenAwarePolicy(DCAwareRoundRobinPolicy()) self.cluster = Cluster(self.nodes_addresses, load_balancing_policy=load_balancing_policy) self.session = self.cluster.connect() self.prep_insert_statement = self.session.prepare(INSERT_STATEMENT) self.prep_insert_statement.consistency_level = ConsistencyLevel.ANY
def test_full_integration_with_local_cassandra(self): aws_conn = AwsConnectionSettings(region="us-east-1", secrets_manager=None, profile="default") execfile("../../secrets.py") compose = DockerCompose(filepath=os.path.dirname(base.__file__)) with compose: host = compose.get_service_host("cassandra", 9042) port = int(compose.get_service_port("cassandra", 9042)) cassandra_conn_setting = CassandraConnectionSettings( cluster_ips=[host], port=port, load_balancing_policy=DCAwareRoundRobinPolicy(), secrets_manager=CassandraSecretsManager( source=DictKeyValueSource({ "CASSANDRA_USERNAME": "", "CASSANDRA_PASSWORD": "", })), ) conn = verify_container_is_up(cassandra_conn_setting) # conn.get_session('system').execute(""" DROP TABLE test.etl_sink_record_state""") settings = AthenaToAdWordsOfflineConversionSettings( source_database=os.getenv("dummy_athena_database"), source_table=os.getenv("dummy_athena_table"), source_connection_settings=aws_conn, etl_identifier="test", destination_batch_size=100, etl_state_manager_connection=cassandra_conn_setting, etl_state_manager_keyspace="test", transformation_column_mapping={ 'google_click_id': 'googleClickId', 'conversion_name': 'conversionName', 'conversion_time': 'conversionTime', 'conversion_value': 'conversionValue', 'conversion_currency_code': 'conversionCurrencyCode' }, destination_connection_settings=GoogleAdWordsConnectionSettings( client_id=os.getenv("adwords_client_id"), user_agent="Tester", client_customer_id=os.getenv("adwords_client_customer_id"), secrets_manager=GoogleAdWordsSecretsManager()), ) etl = AthenaToAdWordsOfflineConversion(settings) files_actual = etl.list_source_files() # # self.assertListEqual(files_actual, []) # etl.upload_all() act = etl.upload_all() self.assertListEqual(act, [])
def __init__(self, signal): self.att = signal self._att_id = None self._datatype = None DC_1 = [ '172.16.2.69', '172.16.2.70', '172.16.2.71', ] DC_2 = ['172.16.2.66', '172.16.2.67', '172.16.2.68'] # Simple Network mapper to resolve green and blue addresses class NetworkAdressTranslator(AddressTranslator): def __init__(self, addr_map=None): self.addr_map = addr_map def translate(self, addr): new_addr = self.addr_map.get(addr) return new_addr # Blue to Green topology addr_map = { # Old nodes, need ip translation "172.16.2.31": "10.0.107.93", "172.16.2.32": "10.0.107.94", "172.16.2.33": "10.0.107.95", "172.16.2.34": "10.0.107.96", "172.16.2.50": "10.0.107.98", "172.16.2.51": "10.0.107.99", # New nodes, IP forwarding is configured on the network "172.16.2.66": "172.16.2.66", "172.16.2.67": "172.16.2.67", "172.16.2.68": "172.16.2.68", "172.16.2.69": "172.16.2.69", "172.16.2.70": "172.16.2.70", "172.16.2.71": "172.16.2.71", } # Convert cluster points to local network hosts self.hdb_cluster = [addr_map[host] for host in DC_1 + DC_2] self.translator = NetworkAdressTranslator(addr_map) hosts = self.hdb_cluster translator = self.translator cluster = Cluster( hosts, connect_timeout=1, address_translator=translator, load_balancing_policy=DCAwareRoundRobinPolicy(local_dc='DC1'), ) self.session = cluster.connect('hdb') self.id_future = self.session.execute_async(self.conf_query) self.datatype_future = self.session.execute_async(self.datatype_query)
def init(self, config): log = logging.getLogger(__name__) log.info("*** STARTING DOMS INITIALIZATION ***") domsconfig = configparser.SafeConfigParser() domsconfig.read(DomsInitializer._get_config_files('domsconfig.ini')) domsconfig = self.override_config(domsconfig, config) cassHost = domsconfig.get("cassandra", "host") cassPort = domsconfig.get("cassandra", "port") cassUsername = domsconfig.get("cassandra", "username") cassPassword = domsconfig.get("cassandra", "password") cassKeyspace = domsconfig.get("cassandra", "keyspace") cassDatacenter = domsconfig.get("cassandra", "local_datacenter") cassVersion = int(domsconfig.get("cassandra", "protocol_version")) cassPolicy = domsconfig.get("cassandra", "dc_policy") try: cassCreateKeyspaceGranted = domsconfig.get( "cassandra", "create_keyspace_granted") except configparser.NoOptionError: cassCreateKeyspaceGranted = "True" log.info("Cassandra Host(s): %s" % (cassHost)) log.info("Cassandra Keyspace: %s" % (cassKeyspace)) log.info("Cassandra Datacenter: %s" % (cassDatacenter)) log.info("Cassandra Protocol Version: %s" % (cassVersion)) log.info("Cassandra DC Policy: %s" % (cassPolicy)) if cassPolicy == 'DCAwareRoundRobinPolicy': dc_policy = DCAwareRoundRobinPolicy(cassDatacenter) token_policy = TokenAwarePolicy(dc_policy) elif cassPolicy == 'WhiteListRoundRobinPolicy': token_policy = WhiteListRoundRobinPolicy([cassHost]) if cassUsername and cassPassword: auth_provider = PlainTextAuthProvider(username=cassUsername, password=cassPassword) else: auth_provider = None with Cluster([host for host in cassHost.split(',')], port=int(cassPort), load_balancing_policy=token_policy, protocol_version=cassVersion, auth_provider=auth_provider) as cluster: session = cluster.connect() if cassCreateKeyspaceGranted in ["True", "true"]: self.createKeyspace(session, cassKeyspace) else: session.set_keyspace(cassKeyspace) self.createTables(session)
def cassandra_cluster(): from cassandra.cluster import Cluster from cassandra.policies import DCAwareRoundRobinPolicy, TokenAwarePolicy, RetryPolicy cassandra_hosts = ['10.0.0.251', '10.0.0.250', '10.0.0.249'] try: return Cluster(contact_points=cassandra_hosts, load_balancing_policy=TokenAwarePolicy( DCAwareRoundRobinPolicy(local_dc='Cassandra')), default_retry_policy=RetryPolicy()) except Error as er: print er
def __init__(self, config): config = config['cassandra'] auth_provider = cassandra.auth.PlainTextAuthProvider( username=config['username'], password=config['password']) self.cluster = cassandra.cluster.Cluster( protocol_version=3, contact_points=config['hosts'], load_balancing_policy=DCAwareRoundRobinPolicy( local_dc=config['local_dc']), auth_provider=auth_provider) self.session = self.cluster.connect(config['keyspace'])
def main(): cluster = Cluster(["cassandra"], load_balancing_policy=DCAwareRoundRobinPolicy(), port=9042) session = cluster.connect() # Check if KEYSPACE already exists and DROP it rows = session.execute("SELECT keyspace_name FROM system_schema.keyspaces") if KEYSPACE in [row[0] for row in rows]: log.info("Dropping existing keyspace...") session.execute("DROP KEYSPACE " + KEYSPACE) # Recreate KEYSPACE log.info("Creating keyspace...") session.execute(""" CREATE KEYSPACE %s WITH replication = { 'class': 'SimpleStrategy', 'replication_factor': '1' } """ % KEYSPACE) log.info("Setting keyspace...") session.set_keyspace(KEYSPACE) # Create 'short_long' table session.execute(""" CREATE TABLE short_long ( short_url text, long_url text, PRIMARY KEY (short_url) ) """) # Create 'long_short' table session.execute(""" CREATE TABLE long_short ( long_url text, short_url text, PRIMARY KEY (long_url) ) """) # Create mock counter - placeholder for distributed counter session.execute(""" CREATE TABLE distributed_counter ( machine_id text PRIMARY KEY, increment counter ) """) # Close connection to cluster log.info("Shutting down cluster...") cluster.shutdown()
def connect_cassandra(): cluster = Cluster( contact_points=[ "35.165.161.176", "52.10.200.87", "35.163.204.211" # AWS_VPC_US_WEST_2 (Amazon Web Services (VPC)) ], load_balancing_policy=DCAwareRoundRobinPolicy( local_dc='AWS_VPC_US_WEST_2'), # your local data centre port=9042, auth_provider=PlainTextAuthProvider( username='******', password='******')) return cluster
def connect_to_cassandra(self, host: str, port: Union[int, str] = 9042, alias: str = None, keyspace: str = None, username: str = None, password: str = '') -> Session: """ Connect to Apache Cassandra cluster. AllowAllAuthenticator and PasswordAuthenticator are supported as authentication backend. This setting should be in configuration file cassandra.yaml: by default: | authenticator: AllowAllAuthenticator or for password authentification: | authenticator: PasswordAuthenticator *Args:*\n _host_ - IP address or host name of a cluster node;\n _port_ - connection port;\n _alias_ - connection alias;\n _keyspace_ - the name of the keyspace that the UDT is defined in;\n _username_ - username to connect to cassandra _password_ - password for username *Returns:*\n Index of current connection. *Example:*\n | Connect To Cassandra | 192.168.1.108 | 9042 | alias=cluster1 | """ logger.info('Connecting using : host={0}, port={1}, alias={2}'.format( host, port, alias)) try: auth_provider = PlainTextAuthProvider( username=username, password=password) if username else None cluster = Cluster([host], port=int(port), auth_provider=auth_provider, load_balancing_policy=TokenAwarePolicy( DCAwareRoundRobinPolicy())) session = cluster.connect() if keyspace is not None: session.set_keyspace(keyspace) self._connection = session return self._cache.register(self._connection, alias) except Exception as e: raise Exception('Connect to Cassandra error: {0}'.format(e))
def __init__(self, num_classes, aug, table, label_col, data_col, id_col, username, cass_pass, cassandra_ips, thread_par=32, port=9042): self.aug = aug self.num_classes = num_classes self.label_col = label_col self.data_col = data_col self.id_col = id_col self.finished_event = threading.Event() self.lock = threading.Lock() self.thread_par = thread_par self.tot = None self.cow = 0 self.onair = 0 self.errors = [] self.feats = [] self.labels = [] self.perm = [] self.bb = None ## multi-label when num_classes is small self.multi_label = (num_classes <= _max_multilabs) ## cassandra parameters prof_dict = ExecutionProfile(load_balancing_policy=TokenAwarePolicy( DCAwareRoundRobinPolicy()), row_factory=cassandra.query.dict_factory) profs = {'dict': prof_dict} auth_prov = PlainTextAuthProvider(username=username, password=cass_pass) self.cluster = Cluster(cassandra_ips, execution_profiles=profs, protocol_version=4, auth_provider=auth_prov, port=port) self.cluster.connect_timeout = 10 #seconds self.sess = self.cluster.connect() self.table = table query = f"SELECT {self.label_col}, {self.data_col} \ FROM {self.table} WHERE {self.id_col}=?" self.prep = self.sess.prepare(query)
def test_no_live_nodes(self): """ Ensure query plan for a downed cluster will execute without errors """ hosts = [] for i in range(4): h = Host(i, SimpleConvictionPolicy) h.set_location_info("dc1", "rack1") hosts.append(h) policy = DCAwareRoundRobinPolicy("dc1", used_hosts_per_remote_dc=1) policy.populate(Mock(), hosts) for host in hosts: policy.on_down(host) qplan = list(policy.make_query_plan()) self.assertEqual(qplan, [])
def test_default_dc(self): host_local = Host(1, SimpleConvictionPolicy, 'local') host_remote = Host(2, SimpleConvictionPolicy, 'remote') host_none = Host(1, SimpleConvictionPolicy) # contact point is '1' cluster = Mock(contact_points=[1]) # contact DC first policy = DCAwareRoundRobinPolicy() policy.populate(cluster, [host_none]) self.assertFalse(policy.local_dc) policy.on_add(host_local) policy.on_add(host_remote) self.assertNotEqual(policy.local_dc, host_remote.datacenter) self.assertEqual(policy.local_dc, host_local.datacenter) # contact DC second policy = DCAwareRoundRobinPolicy() policy.populate(cluster, [host_none]) self.assertFalse(policy.local_dc) policy.on_add(host_remote) policy.on_add(host_local) self.assertNotEqual(policy.local_dc, host_remote.datacenter) self.assertEqual(policy.local_dc, host_local.datacenter) # no DC policy = DCAwareRoundRobinPolicy() policy.populate(cluster, [host_none]) self.assertFalse(policy.local_dc) policy.on_add(host_none) self.assertFalse(policy.local_dc) # only other DC policy = DCAwareRoundRobinPolicy() policy.populate(cluster, [host_none]) self.assertFalse(policy.local_dc) policy.on_add(host_remote) self.assertFalse(policy.local_dc)
def test_modification_during_generation(self): hosts = [Host(i, SimpleConvictionPolicy) for i in range(4)] for h in hosts[:2]: h.set_location_info("dc1", "rack1") for h in hosts[2:]: h.set_location_info("dc2", "rack1") policy = DCAwareRoundRobinPolicy("dc1", used_hosts_per_remote_dc=3) policy.populate(Mock(), hosts) # The general concept here is to change thee internal state of the # policy during plan generation. In this case we use a grey-box # approach that changes specific things during known phases of the # generator. new_host = Host(4, SimpleConvictionPolicy) new_host.set_location_info("dc1", "rack1") # new local before iteration plan = policy.make_query_plan() policy.on_up(new_host) # local list is not bound yet, so we get to see that one self.assertEqual(len(list(plan)), 3 + 2) # remove local before iteration plan = policy.make_query_plan() policy.on_down(new_host) # local list is not bound yet, so we don't see it self.assertEqual(len(list(plan)), 2 + 2) # new local after starting iteration plan = policy.make_query_plan() next(plan) policy.on_up(new_host) # local list was is bound, and one consumed, so we only see the other original self.assertEqual(len(list(plan)), 1 + 2) # remove local after traversing available plan = policy.make_query_plan() for _ in range(3): next(plan) policy.on_down(new_host) # we should be past the local list self.assertEqual(len(list(plan)), 0 + 2) # REMOTES CHANGE new_host.set_location_info("dc2", "rack1") # new remote after traversing local, but not starting remote plan = policy.make_query_plan() for _ in range(2): next(plan) policy.on_up(new_host) # list is updated before we get to it self.assertEqual(len(list(plan)), 0 + 3) # remove remote after traversing local, but not starting remote plan = policy.make_query_plan() for _ in range(2): next(plan) policy.on_down(new_host) # list is updated before we get to it self.assertEqual(len(list(plan)), 0 + 2) # new remote after traversing local, and starting remote plan = policy.make_query_plan() for _ in range(3): next(plan) policy.on_up(new_host) # slice is already made, and we've consumed one self.assertEqual(len(list(plan)), 0 + 1) # remove remote after traversing local, and starting remote plan = policy.make_query_plan() for _ in range(3): next(plan) policy.on_down(new_host) # slice is created with all present, and we've consumed one self.assertEqual(len(list(plan)), 0 + 2) # local DC disappears after finishing it, but not starting remote plan = policy.make_query_plan() for _ in range(2): next(plan) policy.on_down(hosts[0]) policy.on_down(hosts[1]) # dict traversal starts as normal self.assertEqual(len(list(plan)), 0 + 2) policy.on_up(hosts[0]) policy.on_up(hosts[1]) # PYTHON-297 addresses the following cases, where DCs come and go # during generation # local DC disappears after finishing it, and starting remote plan = policy.make_query_plan() for _ in range(3): next(plan) policy.on_down(hosts[0]) policy.on_down(hosts[1]) # dict traversal has begun and consumed one self.assertEqual(len(list(plan)), 0 + 1) policy.on_up(hosts[0]) policy.on_up(hosts[1]) # remote DC disappears after finishing local, but not starting remote plan = policy.make_query_plan() for _ in range(2): next(plan) policy.on_down(hosts[2]) policy.on_down(hosts[3]) # nothing left self.assertEqual(len(list(plan)), 0 + 0) policy.on_up(hosts[2]) policy.on_up(hosts[3]) # remote DC disappears while traversing it plan = policy.make_query_plan() for _ in range(3): next(plan) policy.on_down(hosts[2]) policy.on_down(hosts[3]) # we continue with remainder of original list self.assertEqual(len(list(plan)), 0 + 1) policy.on_up(hosts[2]) policy.on_up(hosts[3]) another_host = Host(5, SimpleConvictionPolicy) another_host.set_location_info("dc3", "rack1") new_host.set_location_info("dc3", "rack1") # new DC while traversing remote plan = policy.make_query_plan() for _ in range(3): next(plan) policy.on_up(new_host) policy.on_up(another_host) # we continue with remainder of original list self.assertEqual(len(list(plan)), 0 + 1) # remote DC disappears after finishing it plan = policy.make_query_plan() for _ in range(3): next(plan) last_host_in_this_dc = next(plan) if last_host_in_this_dc in (new_host, another_host): down_hosts = [new_host, another_host] else: down_hosts = hosts[2:] for h in down_hosts: policy.on_down(h) # the last DC has two self.assertEqual(len(list(plan)), 0 + 2)
def test_status_updates(self): hosts = [Host(i, SimpleConvictionPolicy) for i in range(4)] for h in hosts[:2]: h.set_location_info("dc1", "rack1") for h in hosts[2:]: h.set_location_info("dc2", "rack1") policy = DCAwareRoundRobinPolicy("dc1", used_hosts_per_remote_dc=1) policy.populate(Mock(), hosts) policy.on_down(hosts[0]) policy.on_remove(hosts[2]) new_local_host = Host(4, SimpleConvictionPolicy) new_local_host.set_location_info("dc1", "rack1") policy.on_up(new_local_host) new_remote_host = Host(5, SimpleConvictionPolicy) new_remote_host.set_location_info("dc9000", "rack1") policy.on_add(new_remote_host) # we now have two local hosts and two remote hosts in separate dcs qplan = list(policy.make_query_plan()) self.assertEqual(set(qplan[:2]), set([hosts[1], new_local_host])) self.assertEqual(set(qplan[2:]), set([hosts[3], new_remote_host])) # since we have hosts in dc9000, the distance shouldn't be IGNORED self.assertEqual(policy.distance(new_remote_host), HostDistance.REMOTE) policy.on_down(new_local_host) policy.on_down(hosts[1]) qplan = list(policy.make_query_plan()) self.assertEqual(set(qplan), set([hosts[3], new_remote_host])) policy.on_down(new_remote_host) policy.on_down(hosts[3]) qplan = list(policy.make_query_plan()) self.assertEqual(qplan, [])
def test_get_distance(self): policy = DCAwareRoundRobinPolicy("dc1", used_hosts_per_remote_dc=0) host = Host("ip1", SimpleConvictionPolicy) host.set_location_info("dc1", "rack1") policy.populate(Mock(), [host]) self.assertEqual(policy.distance(host), HostDistance.LOCAL) # used_hosts_per_remote_dc is set to 0, so ignore it remote_host = Host("ip2", SimpleConvictionPolicy) remote_host.set_location_info("dc2", "rack1") self.assertEqual(policy.distance(remote_host), HostDistance.IGNORED) # dc2 isn't registered in the policy's live_hosts dict policy.used_hosts_per_remote_dc = 1 self.assertEqual(policy.distance(remote_host), HostDistance.IGNORED) # make sure the policy has both dcs registered policy.populate(Mock(), [host, remote_host]) self.assertEqual(policy.distance(remote_host), HostDistance.REMOTE) # since used_hosts_per_remote_dc is set to 1, only the first # remote host in dc2 will be REMOTE, the rest are IGNORED second_remote_host = Host("ip3", SimpleConvictionPolicy) second_remote_host.set_location_info("dc2", "rack1") policy.populate(Mock(), [host, remote_host, second_remote_host]) distances = set([policy.distance(remote_host), policy.distance(second_remote_host)]) self.assertEqual(distances, set([HostDistance.REMOTE, HostDistance.IGNORED]))
def test_with_remotes(self): hosts = [Host(i, SimpleConvictionPolicy) for i in range(4)] for h in hosts[:2]: h.set_location_info("dc1", "rack1") for h in hosts[2:]: h.set_location_info("dc2", "rack1") local_hosts = set(h for h in hosts if h.datacenter == "dc1") remote_hosts = set(h for h in hosts if h.datacenter != "dc1") # allow all of the remote hosts to be used policy = DCAwareRoundRobinPolicy("dc1", used_hosts_per_remote_dc=2) policy.populate(Mock(), hosts) qplan = list(policy.make_query_plan()) self.assertEqual(set(qplan[:2]), local_hosts) self.assertEqual(set(qplan[2:]), remote_hosts) # allow only one of the remote hosts to be used policy = DCAwareRoundRobinPolicy("dc1", used_hosts_per_remote_dc=1) policy.populate(Mock(), hosts) qplan = list(policy.make_query_plan()) self.assertEqual(set(qplan[:2]), local_hosts) used_remotes = set(qplan[2:]) self.assertEqual(1, len(used_remotes)) self.assertIn(qplan[2], remote_hosts) # allow no remote hosts to be used policy = DCAwareRoundRobinPolicy("dc1", used_hosts_per_remote_dc=0) policy.populate(Mock(), hosts) qplan = list(policy.make_query_plan()) self.assertEqual(2, len(qplan)) self.assertEqual(local_hosts, set(qplan))