def test_no_nodes(self):
        """
        Ensure query plan for an empty cluster will execute without errors
        """

        policy = DCAwareRoundRobinPolicy("dc1", used_hosts_per_remote_dc=1)
        policy.populate(None, [])

        qplan = list(policy.make_query_plan())
        self.assertEqual(qplan, [])
示例#2
0
    def test_no_nodes(self):
        """
        Ensure query plan for an empty cluster will execute without errors
        """

        policy = DCAwareRoundRobinPolicy("dc1", used_hosts_per_remote_dc=1)
        policy.populate(None, [])

        qplan = list(policy.make_query_plan())
        self.assertEqual(qplan, [])
    def test_no_remote(self):
        hosts = []
        for i in range(4):
            h = Host(i, SimpleConvictionPolicy)
            h.set_location_info("dc1", "rack1")
            hosts.append(h)

        policy = DCAwareRoundRobinPolicy("dc1")
        policy.populate(None, hosts)
        qplan = list(policy.make_query_plan())
        self.assertEqual(sorted(qplan), sorted(hosts))
示例#4
0
    def test_no_remote(self):
        hosts = []
        for i in range(4):
            h = Host(i, SimpleConvictionPolicy)
            h.set_location_info("dc1", "rack1")
            hosts.append(h)

        policy = DCAwareRoundRobinPolicy("dc1")
        policy.populate(None, hosts)
        qplan = list(policy.make_query_plan())
        self.assertEqual(sorted(qplan), sorted(hosts))
示例#5
0
    def setup_tables(self):
        key_space = self.env.config.get(ConfigKeys.KEY_SPACE,
                                        domain=ConfigKeys.STORAGE)
        hosts = self.env.config.get(ConfigKeys.HOST, domain=ConfigKeys.STORAGE)
        hosts = hosts.split(",")

        # required to specify execution profiles in future versions
        profiles = {
            # override the default so we can set consistency level later
            EXEC_PROFILE_DEFAULT:
            ExecutionProfile(
                load_balancing_policy=TokenAwarePolicy(
                    DCAwareRoundRobinPolicy()),
                retry_policy=RetryPolicy(),
                request_timeout=10.0,
                row_factory=Session._row_factory,  # noqa
                # should probably be changed to QUORUM when having more than 3 nodes in the cluster
                consistency_level=ConsistencyLevel.LOCAL_ONE,
            ),
            # TODO: there doesn't seem to be a way to specify execution profile when
            #  using the library's object mapping approach, only when writing pure
            #  cql queries:
            #  https://docs.datastax.com/en/developer/python-driver/3.24/execution_profiles/
            # batch profile has longer timeout since they are run async anyway
            "batch":
            ExecutionProfile(
                load_balancing_policy=TokenAwarePolicy(
                    DCAwareRoundRobinPolicy()),
                request_timeout=120.0,
                consistency_level=ConsistencyLevel.LOCAL_ONE,
            )
        }

        kwargs = {
            "default_keyspace": key_space,
            "protocol_version": 3,
            "retry_connect": True,
            "execution_profiles": profiles,
        }

        username = self._get_from_conf(ConfigKeys.USER, ConfigKeys.STORAGE)
        password = self._get_from_conf(ConfigKeys.PASSWORD, ConfigKeys.STORAGE)

        if password is not None:
            auth_provider = PlainTextAuthProvider(
                username=username,
                password=password,
            )
            kwargs["auth_provider"] = auth_provider

        connection.setup(hosts, **kwargs)

        sync_table(MessageModel)
        sync_table(AttachmentModel)
示例#6
0
 def test_DC_aware_round_robin_policy(self):
     self.assertEqual(
         insights_registry.serialize(DCAwareRoundRobinPolicy()),
         {'namespace': 'cassandra.policies',
          'options': {'local_dc': '', 'used_hosts_per_remote_dc': 0},
          'type': 'DCAwareRoundRobinPolicy'}
     )
     self.assertEqual(
         insights_registry.serialize(DCAwareRoundRobinPolicy(local_dc='fake_local_dc',
                                                             used_hosts_per_remote_dc=15)),
         {'namespace': 'cassandra.policies',
          'options': {'local_dc': 'fake_local_dc', 'used_hosts_per_remote_dc': 15},
          'type': 'DCAwareRoundRobinPolicy'}
     )
示例#7
0
    def init(self, config):
        log = logging.getLogger(__name__)
        log.info("*** STARTING DOMS INITIALIZATION ***")

        domsconfig = ConfigParser.RawConfigParser()
        domsconfig.readfp(pkg_resources.resource_stream(__name__, "domsconfig.ini"), filename='domsconfig.ini')

        cassHost = domsconfig.get("cassandra", "host")
        cassKeyspace = domsconfig.get("cassandra", "keyspace")
        cassDatacenter = domsconfig.get("cassandra", "local_datacenter")
        cassVersion = int(domsconfig.get("cassandra", "protocol_version"))

        log.info("Cassandra Host(s): %s" % (cassHost))
        log.info("Cassandra Keyspace: %s" % (cassKeyspace))
        log.info("Cassandra Datacenter: %s" % (cassDatacenter))
        log.info("Cassandra Protocol Version: %s" % (cassVersion))

        dc_policy = DCAwareRoundRobinPolicy(cassDatacenter)
        token_policy = TokenAwarePolicy(dc_policy)

        with Cluster([host for host in cassHost.split(',')], load_balancing_policy=token_policy,
                     protocol_version=cassVersion) as cluster:
            session = cluster.connect()

            self.createKeyspace(session, cassKeyspace)
            self.createTables(session)
示例#8
0
 def _production_connect(self):
     """
     assumes multiple node Cassandra cluster
     """
     connection.setup(hosts=self.hosts,
                      default_keyspace=self.keyspace,
                      consistency=CL.LOCAL_QUORUM,
                      port=self.port,
                      cql_version=self.cql_version,
                      lazy_connect=True,
                      retry_connect=False,
                      compression=True,
                      auth_provider=None,
                      load_balancing_policy=DCAwareRoundRobinPolicy(
                          local_dc='dc1',
                          used_hosts_per_remote_dc=0),
                      protocol_version=4,
                      executor_threads=2,
                      reconnection_policy=None,
                      default_retry_policy=None,
                      conviction_policy_factory=None,
                      metrics_enabled=False,
                      connection_class=None,
                      ssl_options=None,
                      sockopts=None,
                      max_schema_agreement_wait=10,
                      control_connection_timeout=2.0,
                      idle_heartbeat_interval=30,
                      schema_event_refresh_window=2,
                      topology_event_refresh_window=10,
                      connect_timeout=self.connect_timeout)
     return
    async def connect(cls,
                      contact_points=None,
                      keyspace=None,
                      port=None,
                      username=None,
                      password=None):
        """
        Set connection.

        :param contact_points: list of contact points [cp1, cp1, ..., cpN]
        :param keyspace: keyspace name
        :param port: port
        :param username: username
        :param password: password
        """
        cls.logger.debug("Setting connection")
        cls._set_connection_parameters(contact_points, keyspace, port,
                                       username, password)
        try:
            cls._check_connection_parameters()
            cls.auth = PlainTextAuthProvider(username=cls.username,
                                             password=cls.password)
            lbp = DCAwareRoundRobinPolicy()
            cls.cluster = Cluster(cls.contact_points,
                                  auth_provider=cls.auth,
                                  port=cls.port,
                                  load_balancing_policy=lbp)
            cls.session = cls.cluster.connect(cls.keyspace)
            cls.logger.debug("Connection set")
        except Exception as err:
            cls.logger.error("Connection failed: " + str(err))
            raise Exception("Connection failed: " + str(err))
示例#10
0
    def conn(self, *, ssl_path: str = None) -> Session:  # type: ignore
        """Establishes a Cassandra connection."""
        if not self._session:
            auth_provider = (PlainTextAuthProvider(username=self.user,
                                                   password=self.password)
                             if self.user is not None else None)
            ssl_opts = ({
                "ca_certs": ssl_path,
                "ssl_version": PROTOCOL_TLSv1,
                "cert_reqs": CERT_REQUIRED,
            } if ssl_path is not None else None)

            execution_profiles = {
                EXEC_PROFILE_DEFAULT:
                ExecutionProfile(
                    load_balancing_policy=DCAwareRoundRobinPolicy(),
                    consistency_level=ConsistencyLevel.LOCAL_QUORUM,
                    row_factory=dict_factory,
                )
            }
            cluster = Cluster(
                contact_points=self.host,
                auth_provider=auth_provider,
                ssl_options=ssl_opts,
                execution_profiles=execution_profiles,
            )
            self._session = cluster.connect(self.keyspace)
        return self._session
示例#11
0
async def init(loop):
    # Load configuration file
    config = load_config(PROJECT_ROOT / 'config' / 'config.yml')

    # Connect to Cassandra cluster
    cluster = Cluster(
        [config["cassandra"]["host"]],
        load_balancing_policy=DCAwareRoundRobinPolicy(),
        port=config["cassandra"]["port"])
    session = cluster.connect()

    # Set keyspace
    session.set_keyspace(config["cassandra"]["keyspace"])

    # Threaded Cassandra wrapper for asyncio
    aiosession(session)

    # Setup database store
    db_store = DbStore(session, config)

    # Setup server application
    app = web.Application(loop=loop)
    handler = RoutesHandler(db_store, config)
    register_routes(app, handler)
    host, port = config["api"]["host"], config["api"]["port"]

    return app, host, port
示例#12
0
    def establish_connection(self):
        """

        :return:
        """
        self._log.log("establishing connection with config : " +
                      json.dumps(self.__repr__()))
        try:
            auth_provider = PlainTextAuthProvider(username=self._user_name,
                                                  password=self._password)
            cluster = Cluster(contact_points=self._host,
                              port=self._port,
                              auth_provider=auth_provider,
                              max_schema_agreement_wait=300,
                              control_connection_timeout=10,
                              connect_timeout=30,
                              load_balancing_policy=DCAwareRoundRobinPolicy())
            self._session = cluster.connect()
            self._session.set_keyspace(self._key_space)
            self._log.log("connection established")
            return True
        except Exception as exe:
            self.response["data"] = str(exe)
            self._log.log("Invalid credentials or smthin went wrong",
                          error=str(exe))
        return False
    def test_get_distance(self):
        """
        Same test as DCAwareRoundRobinPolicyTest.test_get_distance()
        Except a FakeCluster is needed for the metadata variable and
        policy.child_policy is needed to change child policy settings
        """

        policy = TokenAwarePolicy(DCAwareRoundRobinPolicy("dc1", used_hosts_per_remote_dc=0))
        host = Host("ip1", SimpleConvictionPolicy)
        host.set_location_info("dc1", "rack1")

        policy.populate(self.FakeCluster(), [host])

        self.assertEqual(policy.distance(host), HostDistance.LOCAL)

        # used_hosts_per_remote_dc is set to 0, so ignore it
        remote_host = Host("ip2", SimpleConvictionPolicy)
        remote_host.set_location_info("dc2", "rack1")
        self.assertEqual(policy.distance(remote_host), HostDistance.IGNORED)

        # dc2 isn't registered in the policy's live_hosts dict
        policy._child_policy.used_hosts_per_remote_dc = 1
        self.assertEqual(policy.distance(remote_host), HostDistance.IGNORED)

        # make sure the policy has both dcs registered
        policy.populate(self.FakeCluster(), [host, remote_host])
        self.assertEqual(policy.distance(remote_host), HostDistance.REMOTE)

        # since used_hosts_per_remote_dc is set to 1, only the first
        # remote host in dc2 will be REMOTE, the rest are IGNORED
        second_remote_host = Host("ip3", SimpleConvictionPolicy)
        second_remote_host.set_location_info("dc2", "rack1")
        policy.populate(self.FakeCluster(), [host, remote_host, second_remote_host])
        distances = set([policy.distance(remote_host), policy.distance(second_remote_host)])
        self.assertEqual(distances, set([HostDistance.REMOTE, HostDistance.IGNORED]))
    def connect_cassandra(self, keyspace):
        '''Connects cassandra using class parameters

        Yields:
            obj: yields class object
        '''

        try:
            auth_provider = PlainTextAuthProvider(username=self.user,
                                                  password=base64.b64decode(
                                                      self.pwd).decode())

            self.cluster = Cluster(
                contact_points=[self.ip_address],
                load_balancing_policy=DCAwareRoundRobinPolicy(
                    local_dc='datacenter1'),
                port=self.port,
                auth_provider=auth_provider,
                control_connection_timeout=100,
                protocol_version=3,)

            self.logger.debug(self.cluster)

            self.session = self.cluster.connect()
            self.session.set_keyspace(keyspace)
            self.logger.info('Cassandra connection is established.')

            yield self
        finally:
            if not self.cluster.is_shutdown:
                self.cluster.shutdown()
                self.logger.info('Cassandra connection is closed.')
示例#15
0
    def get_lb_policy(policy_name: str, policy_args: Dict[str, Any]) -> Policy:
        """
        Creates load balancing policy.

        :param policy_name: Name of the policy to use.
        :param policy_args: Parameters for the policy.
        """
        if policy_name == 'DCAwareRoundRobinPolicy':
            local_dc = policy_args.get('local_dc', '')
            used_hosts_per_remote_dc = int(policy_args.get('used_hosts_per_remote_dc', 0))
            return DCAwareRoundRobinPolicy(local_dc, used_hosts_per_remote_dc)

        if policy_name == 'WhiteListRoundRobinPolicy':
            hosts = policy_args.get('hosts')
            if not hosts:
                raise Exception('Hosts must be specified for WhiteListRoundRobinPolicy')
            return WhiteListRoundRobinPolicy(hosts)

        if policy_name == 'TokenAwarePolicy':
            allowed_child_policies = (
                'RoundRobinPolicy',
                'DCAwareRoundRobinPolicy',
                'WhiteListRoundRobinPolicy',
            )
            child_policy_name = policy_args.get('child_load_balancing_policy', 'RoundRobinPolicy')
            child_policy_args = policy_args.get('child_load_balancing_policy_args', {})
            if child_policy_name not in allowed_child_policies:
                return TokenAwarePolicy(RoundRobinPolicy())
            else:
                child_policy = CassandraHook.get_lb_policy(child_policy_name, child_policy_args)
                return TokenAwarePolicy(child_policy)

        # Fallback to default RoundRobinPolicy
        return RoundRobinPolicy()
示例#16
0
    def get_lb_policy(policy_name, policy_args):
        policies = {
            'RoundRobinPolicy': RoundRobinPolicy,
            'DCAwareRoundRobinPolicy': DCAwareRoundRobinPolicy,
            'WhiteListRoundRobinPolicy': WhiteListRoundRobinPolicy,
            'TokenAwarePolicy': TokenAwarePolicy,
        }

        if not policies.get(policy_name) or policy_name == 'RoundRobinPolicy':
            return RoundRobinPolicy()

        if policy_name == 'DCAwareRoundRobinPolicy':
            local_dc = policy_args.get('local_dc', '')
            used_hosts_per_remote_dc = int(policy_args.get('used_hosts_per_remote_dc', 0))
            return DCAwareRoundRobinPolicy(local_dc, used_hosts_per_remote_dc)

        if policy_name == 'WhiteListRoundRobinPolicy':
            hosts = policy_args.get('hosts')
            if not hosts:
                raise Exception('Hosts must be specified for WhiteListRoundRobinPolicy')
            return WhiteListRoundRobinPolicy(hosts)

        if policy_name == 'TokenAwarePolicy':
            allowed_child_policies = ('RoundRobinPolicy',
                                      'DCAwareRoundRobinPolicy',
                                      'WhiteListRoundRobinPolicy',)
            child_policy_name = policy_args.get('child_load_balancing_policy',
                                                'RoundRobinPolicy')
            child_policy_args = policy_args.get('child_load_balancing_policy_args', {})
            if child_policy_name not in allowed_child_policies:
                return TokenAwarePolicy(RoundRobinPolicy())
            else:
                child_policy = CassandraHook.get_lb_policy(child_policy_name,
                                                           child_policy_args)
                return TokenAwarePolicy(child_policy)
示例#17
0
def get_session():
    '''
    Connect onto a Cassandra cluster with the driver.
    :return: A Cassandra session object for cluster interactions.
    '''

    # grab the cluster information using Docker-provided enviornmental variables
    CASSANDRA_HOST = os.environ['CASSANDRA_HOST']
    CASSANDRA_DC = os.environ['CASSANDRA_DC']

    # create a cluster object that will only connect to a single data center
    cluster = Cluster(
        [CASSANDRA_HOST],
        load_balancing_policy=DCAwareRoundRobinPolicy(local_dc=CASSANDRA_DC),
    )

    # use the faster event loop provider
    cluster.connection_class = LibevConnection

    # create the Cassandra session for cluster interaction
    session = cluster.connect()

    # Panda-centric row factory
    def pandas_factory(colnames, rows):
        return pd.DataFrame(rows, columns=colnames)

    # use Panda-centric settings
    session.row_factory = pandas_factory
    session.default_fetch_size = None

    return session
示例#18
0
def main():
    cluster = Cluster(
        ['127.0.0.1'],
        load_balancing_policy=DCAwareRoundRobinPolicy(),
        port=9042)
    session = cluster.connect()
    session.execute('USE highload')
    session.execute('DROP TABLE IF EXISTS jokes;')
    session.execute("""
         CREATE TABLE jokes (
            id uuid PRIMARY KEY,
            text text,
            creation_timestamp timestamp,
            likes int,
            dislikes int
        )
    """)

    uuids = list()
    letters = string.ascii_uppercase + string.ascii_lowercase + string.digits
    for _ in xrange(int(sys.argv[1])):
        text = ''.join(random.choice(letters) for _ in xrange(200))
        uid = str(uuid.uuid1())
        session.execute("""
            INSERT INTO jokes (id, text, creation_timestamp, likes, dislikes)
                VALUES({}, '{}', toTimestamp(now()), 10, 20) IF NOT EXISTS;
        """.format(uid, text))
        uuids.append(uid)
    if len(sys.argv) >= 3 and sys.argv[2] == 'True':
        with open('inserted_uuids', 'w') as f:
            for el in uuids:
                f.write(el)
                f.write('\n')
示例#19
0
def init():
    global already_loaded
    if already_loaded:
        return

    connection.setup(
        ["localhost"],
        default_keyspace=keyspace,
        protocol_version=3,
        load_balancing_policy=DCAwareRoundRobinPolicy(local_dc='DC1'),
        retry_connect=True)
    global _cql
    _cql = connection.get_session()

    management.create_keyspace_network_topology(keyspace, {'DC1': 1})
    management.sync_table(Article, keyspaces=[keyspace])

    global _es
    _es = Elasticsearch(["localhost"],
                        scheme="http",
                        port=9200,
                        sniff_on_start=False,
                        sniff_on_connection_fail=True)

    if not _es.indices.exists(index=keyspace):
        print("PUT ES mapping")
        _es.indices.create(keyspace,
                           json.loads(open('article-mapping.json').read()))

    already_loaded = True
示例#20
0
    def init(self, config):
        log = logging.getLogger(__name__)
        log.info("*** STARTING DOMS INITIALIZATION ***")

        domsconfig = ConfigParser.ConfigParser()
        domsconfig.read("webservice/algorithms/doms/domsconfig.ini")

        cassHost = domsconfig.get("cassandra", "host")
        cassKeyspace = domsconfig.get("cassandra", "keyspace")
        cassDatacenter = domsconfig.get("cassandra", "local_datacenter")
        cassVersion = domsconfig.get("cassandra", "protocol_version")

        log.info("Cassandra Host(s): %s" % (cassHost))
        log.info("Cassandra Keyspace: %s" % (cassKeyspace))
        log.info("Cassandra Datacenter: %s" % (cassDatacenter))
        log.info("Cassandra Protocol Version: %s" % (cassVersion))

        dc_policy = DCAwareRoundRobinPolicy(cassDatacenter)
        token_policy = TokenAwarePolicy(dc_policy)

        cluster = Cluster([host for host in cassHost.split(',')],
                          load_balancing_policy=token_policy)

        session = cluster.connect()

        self.createKeyspace(session, cassKeyspace)
        self.createTables(session)
示例#21
0
 def __init__(self, TESTING=None):
     self.table = None
     profiles = None
     try:
         # Set the environment variables within your virtual environment
         # having the following names:
         # For production:
         #               KEYSPACE='key1'
         #               CONTACT_POINTS='127.0.1'
         #
         # For testing:
         #               TEST_KEYSPACE='key2'
         #               TEST_CONTACT_POINTS='127.0.1'
         # Activate the environment variable based on weather TESTING is
         # True or False
         if TESTING is None:
             self._keyspace = os.environ['KEYSPACE']
             self._contact_points = os.environ['CONTACT_POINTS'].split(",")
         else:
             self._keyspace = os.environ['TEST_KEYSPACE']
             self._contact_points = os.environ['TEST_CONTACT_POINTS'].split(
                 ",")
     except (KeyError) as err:
         print("KEY ERROR: ", err)
     else:
         policy = ExecutionProfile(
             load_balancing_policy=DCAwareRoundRobinPolicy())
         self._cluster = Cluster(self._contact_points,
                                 execution_profiles=profiles)
         self._session = self._cluster.connect(self._keyspace)
示例#22
0
    def _create_session(self):
        self.cluster = Cluster(
            self.hosts,
            load_balancing_policy=DCAwareRoundRobinPolicy(local_dc=DC),
            port=CASSANDRA_PORT)

        # create the keyspace if not exist
        self._create_keyspace()
        self.session = self.cluster.connect(self.keyspace)
        self.session_pool.append(self.cluster.connect(self.keyspace))

        # create multiple sessions
        for i in range(SESSION_POOL_SIZE - 1):
            cluster = Cluster(
                self.hosts,
                load_balancing_policy=DCAwareRoundRobinPolicy(local_dc=DC),
                port=CASSANDRA_PORT)
            self.session_pool.append(cluster.connect(self.keyspace))
    def connect(self):
        _logger.debug("Connecting to %s", self.nodes_addresses)

        load_balancing_policy = TokenAwarePolicy(DCAwareRoundRobinPolicy())
        self.cluster = Cluster(self.nodes_addresses,
                               load_balancing_policy=load_balancing_policy)
        self.session = self.cluster.connect()

        self.prep_insert_statement = self.session.prepare(INSERT_STATEMENT)
        self.prep_insert_statement.consistency_level = ConsistencyLevel.ANY
    def test_full_integration_with_local_cassandra(self):
        aws_conn = AwsConnectionSettings(region="us-east-1",
                                         secrets_manager=None,
                                         profile="default")
        execfile("../../secrets.py")

        compose = DockerCompose(filepath=os.path.dirname(base.__file__))
        with compose:
            host = compose.get_service_host("cassandra", 9042)
            port = int(compose.get_service_port("cassandra", 9042))

            cassandra_conn_setting = CassandraConnectionSettings(
                cluster_ips=[host],
                port=port,
                load_balancing_policy=DCAwareRoundRobinPolicy(),
                secrets_manager=CassandraSecretsManager(
                    source=DictKeyValueSource({
                        "CASSANDRA_USERNAME": "",
                        "CASSANDRA_PASSWORD": "",
                    })),
            )

            conn = verify_container_is_up(cassandra_conn_setting)
            # conn.get_session('system').execute(""" DROP TABLE test.etl_sink_record_state""")

            settings = AthenaToAdWordsOfflineConversionSettings(
                source_database=os.getenv("dummy_athena_database"),
                source_table=os.getenv("dummy_athena_table"),
                source_connection_settings=aws_conn,
                etl_identifier="test",
                destination_batch_size=100,
                etl_state_manager_connection=cassandra_conn_setting,
                etl_state_manager_keyspace="test",
                transformation_column_mapping={
                    'google_click_id': 'googleClickId',
                    'conversion_name': 'conversionName',
                    'conversion_time': 'conversionTime',
                    'conversion_value': 'conversionValue',
                    'conversion_currency_code': 'conversionCurrencyCode'
                },
                destination_connection_settings=GoogleAdWordsConnectionSettings(
                    client_id=os.getenv("adwords_client_id"),
                    user_agent="Tester",
                    client_customer_id=os.getenv("adwords_client_customer_id"),
                    secrets_manager=GoogleAdWordsSecretsManager()),
            )
            etl = AthenaToAdWordsOfflineConversion(settings)
            files_actual = etl.list_source_files()
            #
            # self.assertListEqual(files_actual, [])

            # etl.upload_all()
            act = etl.upload_all()
            self.assertListEqual(act, [])
示例#25
0
    def __init__(self, signal):
        self.att = signal
        self._att_id = None
        self._datatype = None
        DC_1 = [
            '172.16.2.69',
            '172.16.2.70',
            '172.16.2.71',
        ]
        DC_2 = ['172.16.2.66', '172.16.2.67', '172.16.2.68']

        # Simple Network mapper to resolve green and blue addresses
        class NetworkAdressTranslator(AddressTranslator):
            def __init__(self, addr_map=None):
                self.addr_map = addr_map

            def translate(self, addr):
                new_addr = self.addr_map.get(addr)
                return new_addr

        # Blue to Green topology
        addr_map = {
            # Old nodes, need ip translation
            "172.16.2.31": "10.0.107.93",
            "172.16.2.32": "10.0.107.94",
            "172.16.2.33": "10.0.107.95",
            "172.16.2.34": "10.0.107.96",
            "172.16.2.50": "10.0.107.98",
            "172.16.2.51": "10.0.107.99",
            # New nodes, IP forwarding is configured on the network
            "172.16.2.66": "172.16.2.66",
            "172.16.2.67": "172.16.2.67",
            "172.16.2.68": "172.16.2.68",
            "172.16.2.69": "172.16.2.69",
            "172.16.2.70": "172.16.2.70",
            "172.16.2.71": "172.16.2.71",
        }

        # Convert cluster points to local network hosts
        self.hdb_cluster = [addr_map[host] for host in DC_1 + DC_2]
        self.translator = NetworkAdressTranslator(addr_map)

        hosts = self.hdb_cluster
        translator = self.translator
        cluster = Cluster(
            hosts,
            connect_timeout=1,
            address_translator=translator,
            load_balancing_policy=DCAwareRoundRobinPolicy(local_dc='DC1'),
        )
        self.session = cluster.connect('hdb')
        self.id_future = self.session.execute_async(self.conf_query)
        self.datatype_future = self.session.execute_async(self.datatype_query)
    def init(self, config):
        log = logging.getLogger(__name__)
        log.info("*** STARTING DOMS INITIALIZATION ***")

        domsconfig = configparser.SafeConfigParser()
        domsconfig.read(DomsInitializer._get_config_files('domsconfig.ini'))
        domsconfig = self.override_config(domsconfig, config)

        cassHost = domsconfig.get("cassandra", "host")
        cassPort = domsconfig.get("cassandra", "port")
        cassUsername = domsconfig.get("cassandra", "username")
        cassPassword = domsconfig.get("cassandra", "password")
        cassKeyspace = domsconfig.get("cassandra", "keyspace")
        cassDatacenter = domsconfig.get("cassandra", "local_datacenter")
        cassVersion = int(domsconfig.get("cassandra", "protocol_version"))
        cassPolicy = domsconfig.get("cassandra", "dc_policy")
        try:
            cassCreateKeyspaceGranted = domsconfig.get(
                "cassandra", "create_keyspace_granted")
        except configparser.NoOptionError:
            cassCreateKeyspaceGranted = "True"

        log.info("Cassandra Host(s): %s" % (cassHost))
        log.info("Cassandra Keyspace: %s" % (cassKeyspace))
        log.info("Cassandra Datacenter: %s" % (cassDatacenter))
        log.info("Cassandra Protocol Version: %s" % (cassVersion))
        log.info("Cassandra DC Policy: %s" % (cassPolicy))

        if cassPolicy == 'DCAwareRoundRobinPolicy':
            dc_policy = DCAwareRoundRobinPolicy(cassDatacenter)
            token_policy = TokenAwarePolicy(dc_policy)
        elif cassPolicy == 'WhiteListRoundRobinPolicy':
            token_policy = WhiteListRoundRobinPolicy([cassHost])

        if cassUsername and cassPassword:
            auth_provider = PlainTextAuthProvider(username=cassUsername,
                                                  password=cassPassword)
        else:
            auth_provider = None

        with Cluster([host for host in cassHost.split(',')],
                     port=int(cassPort),
                     load_balancing_policy=token_policy,
                     protocol_version=cassVersion,
                     auth_provider=auth_provider) as cluster:
            session = cluster.connect()

            if cassCreateKeyspaceGranted in ["True", "true"]:
                self.createKeyspace(session, cassKeyspace)
            else:
                session.set_keyspace(cassKeyspace)

            self.createTables(session)
示例#27
0
def cassandra_cluster():
    from cassandra.cluster import Cluster
    from cassandra.policies import DCAwareRoundRobinPolicy, TokenAwarePolicy, RetryPolicy

    cassandra_hosts = ['10.0.0.251', '10.0.0.250', '10.0.0.249']
    try:
        return Cluster(contact_points=cassandra_hosts,
                       load_balancing_policy=TokenAwarePolicy(
                           DCAwareRoundRobinPolicy(local_dc='Cassandra')),
                       default_retry_policy=RetryPolicy())
    except Error as er:
        print er
示例#28
0
    def __init__(self, config):
        config = config['cassandra']
        auth_provider = cassandra.auth.PlainTextAuthProvider(
            username=config['username'], password=config['password'])

        self.cluster = cassandra.cluster.Cluster(
            protocol_version=3,
            contact_points=config['hosts'],
            load_balancing_policy=DCAwareRoundRobinPolicy(
                local_dc=config['local_dc']),
            auth_provider=auth_provider)

        self.session = self.cluster.connect(config['keyspace'])
示例#29
0
def main():
    cluster = Cluster(["cassandra"],
                      load_balancing_policy=DCAwareRoundRobinPolicy(),
                      port=9042)
    session = cluster.connect()

    # Check if KEYSPACE already exists and DROP it
    rows = session.execute("SELECT keyspace_name FROM system_schema.keyspaces")
    if KEYSPACE in [row[0] for row in rows]:
        log.info("Dropping existing keyspace...")
        session.execute("DROP KEYSPACE " + KEYSPACE)

    # Recreate KEYSPACE
    log.info("Creating keyspace...")
    session.execute("""
        CREATE KEYSPACE %s
        WITH replication = { 'class': 'SimpleStrategy', 'replication_factor': '1' }
    """ % KEYSPACE)

    log.info("Setting keyspace...")
    session.set_keyspace(KEYSPACE)

    # Create 'short_long' table
    session.execute("""
        CREATE TABLE short_long (
            short_url text,
            long_url text,
            PRIMARY KEY (short_url)
        )
    """)

    # Create 'long_short' table
    session.execute("""
        CREATE TABLE long_short (
            long_url text,
            short_url text,
            PRIMARY KEY (long_url)
        )
    """)

    # Create mock counter - placeholder for distributed counter
    session.execute("""
        CREATE TABLE distributed_counter (
            machine_id text PRIMARY KEY,
            increment counter
        )
    """)

    # Close connection to cluster
    log.info("Shutting down cluster...")
    cluster.shutdown()
def connect_cassandra():
    cluster = Cluster(
        contact_points=[
            "35.165.161.176",
            "52.10.200.87",
            "35.163.204.211"  # AWS_VPC_US_WEST_2 (Amazon Web Services (VPC))
        ],
        load_balancing_policy=DCAwareRoundRobinPolicy(
            local_dc='AWS_VPC_US_WEST_2'),  # your local data centre
        port=9042,
        auth_provider=PlainTextAuthProvider(
            username='******',
            password='******'))
    return cluster
    def connect_to_cassandra(self,
                             host: str,
                             port: Union[int, str] = 9042,
                             alias: str = None,
                             keyspace: str = None,
                             username: str = None,
                             password: str = '') -> Session:
        """
        Connect to Apache Cassandra cluster.

        AllowAllAuthenticator and PasswordAuthenticator are supported as authentication backend.
        This setting should be in configuration file cassandra.yaml:
        by default:
        | authenticator: AllowAllAuthenticator
        or for password authentification:
        | authenticator: PasswordAuthenticator

        *Args:*\n
            _host_ - IP address or host name of a cluster node;\n
            _port_ - connection port;\n
            _alias_ - connection alias;\n
            _keyspace_ - the name of the keyspace that the UDT is defined in;\n
            _username_ - username to connect to cassandra
            _password_ - password for username

        *Returns:*\n
            Index of current connection.

        *Example:*\n
        | Connect To Cassandra  |  192.168.1.108  |  9042  |  alias=cluster1 |
        """

        logger.info('Connecting using : host={0}, port={1}, alias={2}'.format(
            host, port, alias))
        try:
            auth_provider = PlainTextAuthProvider(
                username=username, password=password) if username else None
            cluster = Cluster([host],
                              port=int(port),
                              auth_provider=auth_provider,
                              load_balancing_policy=TokenAwarePolicy(
                                  DCAwareRoundRobinPolicy()))

            session = cluster.connect()
            if keyspace is not None:
                session.set_keyspace(keyspace)
            self._connection = session
            return self._cache.register(self._connection, alias)
        except Exception as e:
            raise Exception('Connect to Cassandra error: {0}'.format(e))
示例#32
0
    def __init__(self,
                 num_classes,
                 aug,
                 table,
                 label_col,
                 data_col,
                 id_col,
                 username,
                 cass_pass,
                 cassandra_ips,
                 thread_par=32,
                 port=9042):
        self.aug = aug
        self.num_classes = num_classes
        self.label_col = label_col
        self.data_col = data_col
        self.id_col = id_col
        self.finished_event = threading.Event()
        self.lock = threading.Lock()
        self.thread_par = thread_par
        self.tot = None
        self.cow = 0
        self.onair = 0
        self.errors = []
        self.feats = []
        self.labels = []
        self.perm = []
        self.bb = None
        ## multi-label when num_classes is small
        self.multi_label = (num_classes <= _max_multilabs)
        ## cassandra parameters
        prof_dict = ExecutionProfile(load_balancing_policy=TokenAwarePolicy(
            DCAwareRoundRobinPolicy()),
                                     row_factory=cassandra.query.dict_factory)
        profs = {'dict': prof_dict}
        auth_prov = PlainTextAuthProvider(username=username,
                                          password=cass_pass)
        self.cluster = Cluster(cassandra_ips,
                               execution_profiles=profs,
                               protocol_version=4,
                               auth_provider=auth_prov,
                               port=port)
        self.cluster.connect_timeout = 10  #seconds
        self.sess = self.cluster.connect()
        self.table = table
        query = f"SELECT {self.label_col}, {self.data_col} \
        FROM {self.table} WHERE {self.id_col}=?"

        self.prep = self.sess.prepare(query)
示例#33
0
    def test_no_live_nodes(self):
        """
        Ensure query plan for a downed cluster will execute without errors
        """

        hosts = []
        for i in range(4):
            h = Host(i, SimpleConvictionPolicy)
            h.set_location_info("dc1", "rack1")
            hosts.append(h)

        policy = DCAwareRoundRobinPolicy("dc1", used_hosts_per_remote_dc=1)
        policy.populate(Mock(), hosts)

        for host in hosts:
            policy.on_down(host)

        qplan = list(policy.make_query_plan())
        self.assertEqual(qplan, [])
示例#34
0
    def test_default_dc(self):
        host_local = Host(1, SimpleConvictionPolicy, 'local')
        host_remote = Host(2, SimpleConvictionPolicy, 'remote')
        host_none = Host(1, SimpleConvictionPolicy)

        # contact point is '1'
        cluster = Mock(contact_points=[1])

        # contact DC first
        policy = DCAwareRoundRobinPolicy()
        policy.populate(cluster, [host_none])
        self.assertFalse(policy.local_dc)
        policy.on_add(host_local)
        policy.on_add(host_remote)
        self.assertNotEqual(policy.local_dc, host_remote.datacenter)
        self.assertEqual(policy.local_dc, host_local.datacenter)

        # contact DC second
        policy = DCAwareRoundRobinPolicy()
        policy.populate(cluster, [host_none])
        self.assertFalse(policy.local_dc)
        policy.on_add(host_remote)
        policy.on_add(host_local)
        self.assertNotEqual(policy.local_dc, host_remote.datacenter)
        self.assertEqual(policy.local_dc, host_local.datacenter)

        # no DC
        policy = DCAwareRoundRobinPolicy()
        policy.populate(cluster, [host_none])
        self.assertFalse(policy.local_dc)
        policy.on_add(host_none)
        self.assertFalse(policy.local_dc)

        # only other DC
        policy = DCAwareRoundRobinPolicy()
        policy.populate(cluster, [host_none])
        self.assertFalse(policy.local_dc)
        policy.on_add(host_remote)
        self.assertFalse(policy.local_dc)
示例#35
0
    def test_modification_during_generation(self):
        hosts = [Host(i, SimpleConvictionPolicy) for i in range(4)]
        for h in hosts[:2]:
            h.set_location_info("dc1", "rack1")
        for h in hosts[2:]:
            h.set_location_info("dc2", "rack1")

        policy = DCAwareRoundRobinPolicy("dc1", used_hosts_per_remote_dc=3)
        policy.populate(Mock(), hosts)

        # The general concept here is to change thee internal state of the
        # policy during plan generation. In this case we use a grey-box
        # approach that changes specific things during known phases of the
        # generator.

        new_host = Host(4, SimpleConvictionPolicy)
        new_host.set_location_info("dc1", "rack1")

        # new local before iteration
        plan = policy.make_query_plan()
        policy.on_up(new_host)
        # local list is not bound yet, so we get to see that one
        self.assertEqual(len(list(plan)), 3 + 2)

        # remove local before iteration
        plan = policy.make_query_plan()
        policy.on_down(new_host)
        # local list is not bound yet, so we don't see it
        self.assertEqual(len(list(plan)), 2 + 2)

        # new local after starting iteration
        plan = policy.make_query_plan()
        next(plan)
        policy.on_up(new_host)
        # local list was is bound, and one consumed, so we only see the other original
        self.assertEqual(len(list(plan)), 1 + 2)

        # remove local after traversing available
        plan = policy.make_query_plan()
        for _ in range(3):
            next(plan)
        policy.on_down(new_host)
        # we should be past the local list
        self.assertEqual(len(list(plan)), 0 + 2)

        # REMOTES CHANGE
        new_host.set_location_info("dc2", "rack1")

        # new remote after traversing local, but not starting remote
        plan = policy.make_query_plan()
        for _ in range(2):
            next(plan)
        policy.on_up(new_host)
        # list is updated before we get to it
        self.assertEqual(len(list(plan)), 0 + 3)

        # remove remote after traversing local, but not starting remote
        plan = policy.make_query_plan()
        for _ in range(2):
            next(plan)
        policy.on_down(new_host)
        # list is updated before we get to it
        self.assertEqual(len(list(plan)), 0 + 2)

        # new remote after traversing local, and starting remote
        plan = policy.make_query_plan()
        for _ in range(3):
            next(plan)
        policy.on_up(new_host)
        # slice is already made, and we've consumed one
        self.assertEqual(len(list(plan)), 0 + 1)

        # remove remote after traversing local, and starting remote
        plan = policy.make_query_plan()
        for _ in range(3):
            next(plan)
        policy.on_down(new_host)
        # slice is created with all present, and we've consumed one
        self.assertEqual(len(list(plan)), 0 + 2)

        # local DC disappears after finishing it, but not starting remote
        plan = policy.make_query_plan()
        for _ in range(2):
            next(plan)
        policy.on_down(hosts[0])
        policy.on_down(hosts[1])
        # dict traversal starts as normal
        self.assertEqual(len(list(plan)), 0 + 2)
        policy.on_up(hosts[0])
        policy.on_up(hosts[1])

        # PYTHON-297 addresses the following cases, where DCs come and go
        # during generation
        # local DC disappears after finishing it, and starting remote
        plan = policy.make_query_plan()
        for _ in range(3):
            next(plan)
        policy.on_down(hosts[0])
        policy.on_down(hosts[1])
        # dict traversal has begun and consumed one
        self.assertEqual(len(list(plan)), 0 + 1)
        policy.on_up(hosts[0])
        policy.on_up(hosts[1])

        # remote DC disappears after finishing local, but not starting remote
        plan = policy.make_query_plan()
        for _ in range(2):
            next(plan)
        policy.on_down(hosts[2])
        policy.on_down(hosts[3])
        # nothing left
        self.assertEqual(len(list(plan)), 0 + 0)
        policy.on_up(hosts[2])
        policy.on_up(hosts[3])

        # remote DC disappears while traversing it
        plan = policy.make_query_plan()
        for _ in range(3):
            next(plan)
        policy.on_down(hosts[2])
        policy.on_down(hosts[3])
        # we continue with remainder of original list
        self.assertEqual(len(list(plan)), 0 + 1)
        policy.on_up(hosts[2])
        policy.on_up(hosts[3])


        another_host = Host(5, SimpleConvictionPolicy)
        another_host.set_location_info("dc3", "rack1")
        new_host.set_location_info("dc3", "rack1")

        # new DC while traversing remote
        plan = policy.make_query_plan()
        for _ in range(3):
            next(plan)
        policy.on_up(new_host)
        policy.on_up(another_host)
        # we continue with remainder of original list
        self.assertEqual(len(list(plan)), 0 + 1)

        # remote DC disappears after finishing it
        plan = policy.make_query_plan()
        for _ in range(3):
            next(plan)
        last_host_in_this_dc = next(plan)
        if last_host_in_this_dc in (new_host, another_host):
            down_hosts = [new_host, another_host]
        else:
            down_hosts = hosts[2:]
        for h in down_hosts:
            policy.on_down(h)
        # the last DC has two
        self.assertEqual(len(list(plan)), 0 + 2)
示例#36
0
    def test_status_updates(self):
        hosts = [Host(i, SimpleConvictionPolicy) for i in range(4)]
        for h in hosts[:2]:
            h.set_location_info("dc1", "rack1")
        for h in hosts[2:]:
            h.set_location_info("dc2", "rack1")

        policy = DCAwareRoundRobinPolicy("dc1", used_hosts_per_remote_dc=1)
        policy.populate(Mock(), hosts)
        policy.on_down(hosts[0])
        policy.on_remove(hosts[2])

        new_local_host = Host(4, SimpleConvictionPolicy)
        new_local_host.set_location_info("dc1", "rack1")
        policy.on_up(new_local_host)

        new_remote_host = Host(5, SimpleConvictionPolicy)
        new_remote_host.set_location_info("dc9000", "rack1")
        policy.on_add(new_remote_host)

        # we now have two local hosts and two remote hosts in separate dcs
        qplan = list(policy.make_query_plan())
        self.assertEqual(set(qplan[:2]), set([hosts[1], new_local_host]))
        self.assertEqual(set(qplan[2:]), set([hosts[3], new_remote_host]))

        # since we have hosts in dc9000, the distance shouldn't be IGNORED
        self.assertEqual(policy.distance(new_remote_host), HostDistance.REMOTE)

        policy.on_down(new_local_host)
        policy.on_down(hosts[1])
        qplan = list(policy.make_query_plan())
        self.assertEqual(set(qplan), set([hosts[3], new_remote_host]))

        policy.on_down(new_remote_host)
        policy.on_down(hosts[3])
        qplan = list(policy.make_query_plan())
        self.assertEqual(qplan, [])
示例#37
0
    def test_get_distance(self):
        policy = DCAwareRoundRobinPolicy("dc1", used_hosts_per_remote_dc=0)
        host = Host("ip1", SimpleConvictionPolicy)
        host.set_location_info("dc1", "rack1")
        policy.populate(Mock(), [host])

        self.assertEqual(policy.distance(host), HostDistance.LOCAL)

        # used_hosts_per_remote_dc is set to 0, so ignore it
        remote_host = Host("ip2", SimpleConvictionPolicy)
        remote_host.set_location_info("dc2", "rack1")
        self.assertEqual(policy.distance(remote_host), HostDistance.IGNORED)

        # dc2 isn't registered in the policy's live_hosts dict
        policy.used_hosts_per_remote_dc = 1
        self.assertEqual(policy.distance(remote_host), HostDistance.IGNORED)

        # make sure the policy has both dcs registered
        policy.populate(Mock(), [host, remote_host])
        self.assertEqual(policy.distance(remote_host), HostDistance.REMOTE)

        # since used_hosts_per_remote_dc is set to 1, only the first
        # remote host in dc2 will be REMOTE, the rest are IGNORED
        second_remote_host = Host("ip3", SimpleConvictionPolicy)
        second_remote_host.set_location_info("dc2", "rack1")
        policy.populate(Mock(), [host, remote_host, second_remote_host])
        distances = set([policy.distance(remote_host), policy.distance(second_remote_host)])
        self.assertEqual(distances, set([HostDistance.REMOTE, HostDistance.IGNORED]))
示例#38
0
    def test_with_remotes(self):
        hosts = [Host(i, SimpleConvictionPolicy) for i in range(4)]
        for h in hosts[:2]:
            h.set_location_info("dc1", "rack1")
        for h in hosts[2:]:
            h.set_location_info("dc2", "rack1")

        local_hosts = set(h for h in hosts if h.datacenter == "dc1")
        remote_hosts = set(h for h in hosts if h.datacenter != "dc1")

        # allow all of the remote hosts to be used
        policy = DCAwareRoundRobinPolicy("dc1", used_hosts_per_remote_dc=2)
        policy.populate(Mock(), hosts)
        qplan = list(policy.make_query_plan())
        self.assertEqual(set(qplan[:2]), local_hosts)
        self.assertEqual(set(qplan[2:]), remote_hosts)

        # allow only one of the remote hosts to be used
        policy = DCAwareRoundRobinPolicy("dc1", used_hosts_per_remote_dc=1)
        policy.populate(Mock(), hosts)
        qplan = list(policy.make_query_plan())
        self.assertEqual(set(qplan[:2]), local_hosts)

        used_remotes = set(qplan[2:])
        self.assertEqual(1, len(used_remotes))
        self.assertIn(qplan[2], remote_hosts)

        # allow no remote hosts to be used
        policy = DCAwareRoundRobinPolicy("dc1", used_hosts_per_remote_dc=0)
        policy.populate(Mock(), hosts)
        qplan = list(policy.make_query_plan())
        self.assertEqual(2, len(qplan))
        self.assertEqual(local_hosts, set(qplan))