示例#1
0
  def apply_mutations(self, mutations):
    """ Apply mutations across tables.

    Args:
      mutations: A list of dictionaries representing mutations.
    """
    prepared_statements = {'insert': {}, 'delete': {}}
    statements_and_params = []
    for mutation in mutations:
      table = mutation['table']
      if mutation['operation'] == TxnActions.PUT:
        if table not in prepared_statements['insert']:
          prepared_statements['insert'][table] = self.prepare_insert(table)
        values = mutation['values']
        for column in values:
          params = (bytearray(mutation['key']), column,
                    bytearray(values[column]))
          statements_and_params.append(
            (prepared_statements['insert'][table], params))
      elif mutation['operation'] == TxnActions.DELETE:
        if table not in prepared_statements['delete']:
          prepared_statements['delete'][table] = self.prepare_delete(table)
        params = (bytearray(mutation['key']),)
        statements_and_params.append(
          (prepared_statements['delete'][table], params))

    execute_concurrent(self.session, statements_and_params,
                       raise_on_first_error=True)
示例#2
0
 def execute_modify_items(self, modify_items, concurency):
     if len(modify_items) == 0:
         return {}
     statements_and_params = []
     if ISDEBUG:
         logger.log("prepare data for cassandra")
         st = time.time()
     for item in modify_items:
         if item[0] == 'insert':
             statements_and_params.append(
                 (self.get_insert_stmt(), self.get_insert_args(item[1])))
         elif item[0] == 'delete':
             statements_and_params.append(
                 (self.get_delete_stmt(), self.get_delete_args(item[1])))
         else:
             raise ValueError('unknown modify item type')
     if ISDEBUG:
         logger.log("prepare data finished in {0} ms".format(
             (time.time() - st) * 1000))
         logger.log("start modify operation. count: {0}".format(
             len(modify_items)))
         st = time.time()
     if len(statements_and_params) == 1:
         self.session.execute(statements_and_params[0][0],
                              statements_and_params[0][1])
     else:
         execute_concurrent(self.session,
                            statements_and_params,
                            raise_on_first_error=True,
                            concurrency=concurency)
     if ISDEBUG:
         logger.log("modify completed in {0} ms".format(
             (time.time() - st) * 1000))
示例#3
0
    def __store_results__(self, workflow_id, aggregations):
        if self.gold_standard:
            db = "gold_standard_penguins"
        else:
            db = "penguins"

        # try:
        #     self.cassandra_session.execute("drop table " + db)
        # except cassandra.InvalidRequest:
        #     print "table did not already exist"
        #
        # self.cassandra_session.execute("CREATE TABLE " + db + " (zooniverse_id text, aggregations text, primary key(zooniverse_id))")

        insert_statement = self.cassandra_session.prepare("""
                insert into """ + db + """ (zooniverse_id,aggregations)
                values (?,?)""")
        statements_and_params = []
        for zooniverse_id in aggregations:
            statements_and_params.append(
                (insert_statement, (zooniverse_id,
                                    json.dumps(aggregations[zooniverse_id]))))

        execute_concurrent(self.cassandra_session,
                           statements_and_params,
                           raise_on_first_error=True)
    def test_paging_state(self):
        """
        Test to validate paging state api
        @since 3.7.0
        @jira_ticket PYTHON-200
        @expected_result paging state should returned should be accurate, and allow for queries to be resumed.

        @test_category queries
        """
        statements_and_params = zip(cycle(["INSERT INTO test3rf.test (k, v) VALUES (%s, 0)"]),
                                    [(i, ) for i in range(100)])
        execute_concurrent(self.session, list(statements_and_params))

        list_all_results = []
        self.session.default_fetch_size = 3

        result_set = self.session.execute("SELECT * FROM test3rf.test")
        while(result_set.has_more_pages):
            for row in result_set.current_rows:
                self.assertNotIn(row, list_all_results)
            list_all_results.extend(result_set.current_rows)
            page_state = result_set.paging_state
            result_set = self.session.execute("SELECT * FROM test3rf.test", paging_state=page_state)

        if(len(result_set.current_rows) > 0):
            list_all_results.append(result_set.current_rows)
        self.assertEqual(len(list_all_results), 100)
示例#5
0
    def test_paging_state(self):
        """
        Test to validate paging state api
        @since 3.7.0
        @jira_ticket PYTHON-200
        @expected_result paging state should returned should be accurate, and allow for queries to be resumed.

        @test_category queries
        """
        statements_and_params = zip(
            cycle(["INSERT INTO test3rf.test (k, v) VALUES (%s, 0)"]),
            [(i, ) for i in range(100)])
        execute_concurrent(self.session, list(statements_and_params))

        list_all_results = []
        self.session.default_fetch_size = 3

        result_set = self.session.execute("SELECT * FROM test3rf.test")
        while (result_set.has_more_pages):
            for row in result_set.current_rows:
                self.assertNotIn(row, list_all_results)
            list_all_results.extend(result_set.current_rows)
            page_state = result_set.paging_state
            result_set = self.session.execute("SELECT * FROM test3rf.test",
                                              paging_state=page_state)

        if (len(result_set.current_rows) > 0):
            list_all_results.append(result_set.current_rows)
        self.assertEqual(len(list_all_results), 100)
示例#6
0
def concurrent_worker(ninserts, threadnum, queue):
    # get connection
    connection = connect(SEED_NODES, keyspace=KEYSPACE, datacenter=DATACENTER)
    preparedstmt = connection.prepare("INSERT INTO tst (sernum, area, rectime) VALUES (?, ?, ?)")
    preparedstmt.consistency_level=CONSISTENCY

    inserts = 0
    total_insert_time = 0.0
    while (inserts < ninserts):
        # make a unique and incremental serial number across threads
        sernum = 'SN%05X%07X' %(threadnum, inserts)
        # make 2 to 9 inserts for this sernum
        statements_and_params = []
        for i in xrange(random.randint(2,9)):
            statements_and_params.append([preparedstmt, (sernum, str(i), datetime.datetime.utcnow())])

        start_ins_time = datetime.datetime.now()
        execute_concurrent(connection, statements_and_params)
        stop_ins_time = datetime.datetime.now()
        insert_time = (stop_ins_time - start_ins_time).total_seconds()
        total_insert_time += insert_time
        inserts += len(statements_and_params)

    print 'Thread %d, performed %d inserts in %f secs (%f inserts/sec)' %(threadnum, ninserts, total_insert_time, inserts / total_insert_time)
    connection.shutdown()
    # save all the thread specific data
    queue.put([total_insert_time, inserts, inserts / total_insert_time])
    def test_execute_concurrent(self):
        for num_statements in (0, 1, 2, 7, 10, 99, 100, 101, 199, 200, 201):
            # write
            statement = SimpleStatement(
                "INSERT INTO test3rf.test (k, v) VALUES (%s, %s)",
                consistency_level=ConsistencyLevel.QUORUM)
            statements = cycle((statement, ))
            parameters = [(i, i) for i in range(num_statements)]

            results = execute_concurrent(self.session,
                                         list(zip(statements, parameters)))
            self.assertEqual(num_statements, len(results))
            self.assertEqual([(True, None)] * num_statements, results)

            # read
            statement = SimpleStatement(
                "SELECT v FROM test3rf.test WHERE k=%s",
                consistency_level=ConsistencyLevel.QUORUM)
            statements = cycle((statement, ))
            parameters = [(i, ) for i in range(num_statements)]

            results = execute_concurrent(self.session,
                                         list(zip(statements, parameters)))
            self.assertEqual(num_statements, len(results))
            self.assertEqual([(True, [(i, )]) for i in range(num_statements)],
                             results)
示例#8
0
    def batch_put_entity(self,
                         table_name,
                         row_keys,
                         column_names,
                         cell_values,
                         ttl=None):
        """
    Allows callers to store multiple rows with a single call. A row can 
    have multiple columns and values with them. We refer to each row as 
    an entity.
   
    Args: 
      table_name: The table to mutate
      row_keys: A list of keys to store on
      column_names: A list of columns to mutate
      cell_values: A dict of key/value pairs
      ttl: The number of seconds to keep the row.
    Raises:
      TypeError: If an argument passed in was not of the expected type.
      AppScaleDBConnectionError: If the batch_put could not be performed due to
        an error with Cassandra.
    """
        if not isinstance(table_name, str):
            raise TypeError("Expected a str")
        if not isinstance(column_names, list):
            raise TypeError("Expected a list")
        if not isinstance(row_keys, list):
            raise TypeError("Expected a list")
        if not isinstance(cell_values, dict):
            raise TypeError("Expected a dict")

        insert_str = """
      INSERT INTO "{table}" ({key}, {column}, {value})
      VALUES (?, ?, ?)
    """.format(table=table_name,
               key=ThriftColumn.KEY,
               column=ThriftColumn.COLUMN_NAME,
               value=ThriftColumn.VALUE)

        if ttl is not None:
            insert_str += 'USING TTL {}'.format(ttl)

        statement = self.session.prepare(insert_str)

        statements_and_params = []
        for row_key in row_keys:
            for column in column_names:
                params = (bytearray(row_key), column,
                          bytearray(cell_values[row_key][column]))
                statements_and_params.append((statement, params))

        try:
            execute_concurrent(self.session,
                               statements_and_params,
                               raise_on_first_error=True)
        except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
            message = 'Exception during batch_put_entity'
            logging.exception(message)
            raise AppScaleDBConnectionError(message)
示例#9
0
def advance_to_next_cl_segment(session, commitlog_dir,
                               keyspace_name='ks', table_name='junk_table',
                               timeout=60, debug=True):
    """
    This is a hack to work around problems like CASSANDRA-11811.

    The problem happens in commitlog-replaying tests, like the snapshot and CDC
    tests. If we replay the first commitlog that's created, we wind up
    replaying some mutations that initialize system tables, so this function
    advances the node to the next CL by filling up the first one.
    """
    if debug:
        _debug = dtest.debug
    else:
        def _debug(*args, **kwargs):
            """
            noop debug method
            """
            pass

    session.execute(
        'CREATE TABLE {ks}.{tab} ('
        'a uuid PRIMARY KEY, b uuid, c uuid, d uuid, '
        'e uuid, f uuid, g uuid, h uuid'
        ')'.format(ks=keyspace_name, tab=table_name)
    )
    prepared_insert = session.prepare(
        'INSERT INTO {ks}.{tab} '
        '(a, b, c, d, e, f, g, h) '
        'VALUES ('
        'uuid(), uuid(), uuid(), uuid(), '
        'uuid(), uuid(), uuid(), uuid()'
        ')'.format(ks=keyspace_name, tab=table_name)
    )

    # record segments that we want to advance past
    initial_cl_files = _files_in(commitlog_dir)

    start = time.time()
    stop_time = start + timeout
    rate_limited_debug = get_rate_limited_function(_debug, 5)
    _debug('attempting to write until we start writing to new CL segments: {}'.format(initial_cl_files))

    while _files_in(commitlog_dir) <= initial_cl_files:
        elapsed = time.time() - start
        rate_limited_debug('  commitlog-advancing load step has lasted {s:.2f}s'.format(s=elapsed))
        assert_less_equal(
            time.time(), stop_time,
            "It's been over a {s}s and we haven't written a new "
            "commitlog segment. Something is wrong.".format(s=timeout)
        )
        execute_concurrent(
            session,
            ((prepared_insert, ()) for _ in range(1000)),
            concurrency=500,
            raise_on_first_error=True,
        )

    _debug('present commitlog segments: {}'.format(_files_in(commitlog_dir)))
示例#10
0
    def test_paging_callbacks(self):
        statements_and_params = zip(
            cycle(["INSERT INTO test3rf.test (k, v) VALUES (%s, 0)"]),
            [(i, ) for i in range(100)])
        execute_concurrent(self.session, list(statements_and_params))

        prepared = self.session.prepare("SELECT * FROM test3rf.test")

        for fetch_size in (2, 3, 7, 10, 99, 100, 101, 10000):
            self.session.default_fetch_size = fetch_size
            future = self.session.execute_async("SELECT * FROM test3rf.test",
                                                timeout=20)

            event = Event()
            counter = count()

            def handle_page(rows, future, counter):
                for row in rows:
                    next(counter)

                if future.has_more_pages:
                    future.start_fetching_next_page()
                else:
                    event.set()

            def handle_error(err):
                event.set()
                self.fail(err)

            future.add_callbacks(callback=handle_page,
                                 callback_args=(future, counter),
                                 errback=handle_error)
            event.wait()
            self.assertEqual(next(counter), 100)

            # simple statement
            future = self.session.execute_async(
                SimpleStatement("SELECT * FROM test3rf.test"), timeout=20)
            event.clear()
            counter = count()

            future.add_callbacks(callback=handle_page,
                                 callback_args=(future, counter),
                                 errback=handle_error)
            event.wait()
            self.assertEqual(next(counter), 100)

            # prepared statement
            future = self.session.execute_async(prepared, timeout=20)
            event.clear()
            counter = count()

            future.add_callbacks(callback=handle_page,
                                 callback_args=(future, counter),
                                 errback=handle_error)
            event.wait()
            self.assertEqual(next(counter), 100)
示例#11
0
  def apply_mutations(self, mutations, txid):
    """ Apply mutations across tables.

    Args:
      mutations: A list of dictionaries representing mutations.
      txid: An integer specifying a transaction ID.
    """
    statements_and_params = self.statements_for_mutations(mutations, txid)
    execute_concurrent(self.session, statements_and_params,
                       raise_on_first_error=True)
示例#12
0
    def test_async_paging_verify_writes(self):
        ddl = '''
            CREATE TABLE test3rf.test_async_paging_verify (
                k1 int,
                k2 int,
                v int,
                PRIMARY KEY(k1, k2)
            )'''
        self.session.execute(ddl)

        statements_and_params = zip(
            cycle([
                "INSERT INTO test3rf.test_async_paging_verify "
                "(k1, k2, v) VALUES (0, %s, %s)"
            ]), [(i, i + 1) for i in range(100)])
        execute_concurrent(self.session, statements_and_params)

        prepared = self.session.prepare(
            "SELECT * FROM test3rf.test_async_paging_verify")

        for fetch_size in (2, 3, 7, 10, 99, 100, 101, 10000):
            self.session.default_fetch_size = fetch_size
            results = self.session.execute_async(
                "SELECT * FROM test3rf.test_async_paging_verify").result()
            result_array = []
            value_array = []
            for result in results:
                result_array.append(result.k2)
                value_array.append(result.v)

            self.assertSequenceEqual(range(100), result_array)
            self.assertSequenceEqual(range(1, 101), value_array)

            statement = SimpleStatement(
                "SELECT * FROM test3rf.test_async_paging_verify")
            results = self.session.execute_async(statement).result()
            result_array = []
            value_array = []
            for result in results:
                result_array.append(result.k2)
                value_array.append(result.v)

            self.assertSequenceEqual(range(100), result_array)
            self.assertSequenceEqual(range(1, 101), value_array)

            results = self.session.execute_async(prepared).result()
            result_array = []
            value_array = []
            for result in results:
                result_array.append(result.k2)
                value_array.append(result.v)

            self.assertSequenceEqual(range(100), result_array)
            self.assertSequenceEqual(range(1, 101), value_array)
示例#13
0
    def create_cluster(cls):

        cls.cluster_with_profiles = Cluster(protocol_version=cls.protocol_version, execution_profiles=cls.execution_profiles)

        cls.session_with_profiles = cls.cluster_with_profiles.connect(wait_for_all_pools=True)
        statements_and_params = zip(
            cycle(["INSERT INTO  " + cls.ks_name + "." + cls.ks_name + " (k, v) VALUES (%s, 0)"]),
            [(i,) for i in range(150)])
        execute_concurrent(cls.session_with_profiles, list(statements_and_params))

        cls.select_all_statement = "SELECT * FROM {0}.{0}".format(cls.ks_name)
示例#14
0
    def load_warehouse_tax_data(self, csv_file, session):
        query = session.prepare(
            "INSERT INTO warehouse_tax (w_id, w_tax) VALUES(?, ?)")
        reader = csv.reader(csv_file)
        query_and_params = []
        for line in itertools.islice(reader, self.ROW_COUNT):
            query_and_params.append((query, (int(line[0]), float(line[7]))))

        execute_concurrent(session,
                           query_and_params,
                           raise_on_first_error=True)
示例#15
0
  def batch_put_entity(self, table_name, row_keys, column_names, cell_values,
                       ttl=None):
    """
    Allows callers to store multiple rows with a single call. A row can 
    have multiple columns and values with them. We refer to each row as 
    an entity.
   
    Args: 
      table_name: The table to mutate
      row_keys: A list of keys to store on
      column_names: A list of columns to mutate
      cell_values: A dict of key/value pairs
      ttl: The number of seconds to keep the row.
    Raises:
      TypeError: If an argument passed in was not of the expected type.
      AppScaleDBConnectionError: If the batch_put could not be performed due to
        an error with Cassandra.
    """
    if not isinstance(table_name, str):
      raise TypeError("Expected a str")
    if not isinstance(column_names, list):
      raise TypeError("Expected a list")
    if not isinstance(row_keys, list):
      raise TypeError("Expected a list")
    if not isinstance(cell_values, dict):
      raise TypeError("Expected a dict")

    insert_str = """
      INSERT INTO "{table}" ({key}, {column}, {value})
      VALUES (?, ?, ?)
    """.format(table=table_name,
               key=ThriftColumn.KEY,
               column=ThriftColumn.COLUMN_NAME,
               value=ThriftColumn.VALUE)

    if ttl is not None:
      insert_str += 'USING TTL {}'.format(ttl)

    statement = self.session.prepare(insert_str)

    statements_and_params = []
    for row_key in row_keys:
      for column in column_names:
        params = (bytearray(row_key), column,
                  bytearray(cell_values[row_key][column]))
        statements_and_params.append((statement, params))

    try:
      execute_concurrent(self.session, statements_and_params,
                         raise_on_first_error=True)
    except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
      message = 'Exception during batch_put_entity'
      logging.exception(message)
      raise AppScaleDBConnectionError(message)
示例#16
0
def _insert_rows(session, table_name, insert_stmt, values):
    prepared_insert = session.prepare(insert_stmt)
    values = list(values)  # in case values is a generator
    execute_concurrent(session, ((prepared_insert, x) for x in values),
                       concurrency=500, raise_on_first_error=True)

    data_loaded = rows_to_list(session.execute('SELECT * FROM ' + table_name))
    logger.debug('{n} rows inserted into {table_name}'.format(n=len(data_loaded), table_name=table_name))
    # use assert_equal over assert_length_equal to avoid printing out
    # potentially large lists
    assert len(values) == len(data_loaded)
    return data_loaded
示例#17
0
    def load_district_next_order_id_data(self, csv_file, session):
        query = session.prepare(
            "INSERT INTO district_next_order_id (d_w_id, d_id, d_tax, d_next_o_id) "
            "VALUES(?, ?, ?, ?)")
        reader = csv.reader(csv_file)
        query_and_params = []
        for line in itertools.islice(reader, self.ROW_COUNT):
            query_and_params.append((query, (int(line[0]), int(line[1]),
                                             float(line[8]), int(line[10]))))

        execute_concurrent(session,
                           query_and_params,
                           raise_on_first_error=True)
示例#18
0
    def test_concurrent_with_paging(self):
        statements_and_params = zip(cycle(["INSERT INTO test3rf.test (k, v) VALUES (%s, 0)"]),
                                    [(i, ) for i in range(100)])
        execute_concurrent(self.session, list(statements_and_params))

        prepared = self.session.prepare("SELECT * FROM test3rf.test")

        for fetch_size in (2, 3, 7, 10, 99, 100, 101, 10000):
            self.session.default_fetch_size = fetch_size
            results = execute_concurrent_with_args(self.session, prepared, [None] * 10)
            self.assertEqual(10, len(results))
            for (success, result) in results:
                self.assertTrue(success)
                self.assertEqual(100, len(list(result)))
    def test_concurrent_with_paging(self):
        statements_and_params = zip(cycle(["INSERT INTO test3rf.test (k, v) VALUES (%s, 0)"]),
                                    [(i, ) for i in range(100)])
        execute_concurrent(self.session, list(statements_and_params))

        prepared = self.session.prepare("SELECT * FROM test3rf.test")

        for fetch_size in (2, 3, 7, 10, 99, 100, 101, 10000):
            self.session.default_fetch_size = fetch_size
            results = execute_concurrent_with_args(self.session, prepared, [None] * 10)
            self.assertEqual(10, len(results))
            for (success, result) in results:
                self.assertTrue(success)
                self.assertEqual(100, len(list(result)))
示例#20
0
        def _execute(self, statements_and_parameters):
            """
            Execute a list of statements and parameters NOT returning data.

            :param iterable[tuple] statements_and_parameters: list of statements and parameters
            """
            size = self.CONCURRENCY
            for sub_sequence in CassandraClient.split_sequence(
                    statements_and_parameters, size):
                c_concurrent.execute_concurrent(
                    self._session,
                    sub_sequence,
                    concurrency=size,
                )
示例#21
0
    def apply_mutations(self, mutations, txid):
        """ Apply mutations across tables.

    Args:
      mutations: A list of dictionaries representing mutations.
      txid: An integer specifying a transaction ID.
    """
        prepared_statements = {'insert': {}, 'delete': {}}
        statements_and_params = []
        for mutation in mutations:
            table = mutation['table']

            if table == 'group_updates':
                key = mutation['key']
                insert = """
          INSERT INTO group_updates (group, last_update)
          VALUES (%(group)s, %(last_update)s)
          USING TIMESTAMP %(timestamp)s
        """
                parameters = {
                    'group': key,
                    'last_update': mutation['last_update'],
                    'timestamp': get_write_time(txid)
                }
                statements_and_params.append(
                    (SimpleStatement(insert), parameters))
                continue

            if mutation['operation'] == Operations.PUT:
                if table not in prepared_statements['insert']:
                    prepared_statements['insert'][table] = self.prepare_insert(
                        table)
                values = mutation['values']
                for column in values:
                    params = (bytearray(mutation['key']), column,
                              bytearray(values[column]), get_write_time(txid))
                    statements_and_params.append(
                        (prepared_statements['insert'][table], params))
            elif mutation['operation'] == Operations.DELETE:
                if table not in prepared_statements['delete']:
                    prepared_statements['delete'][table] = self.prepare_delete(
                        table)
                params = (get_write_time(txid), bytearray(mutation['key']))
                statements_and_params.append(
                    (prepared_statements['delete'][table], params))

        execute_concurrent(self.session,
                           statements_and_params,
                           raise_on_first_error=True)
示例#22
0
    def test_async_paging_verify_writes(self):
        ddl = '''
            CREATE TABLE test3rf.test_async_paging_verify (
                k1 int,
                k2 int,
                v int,
                PRIMARY KEY(k1, k2)
            )'''
        self.session.execute(ddl)

        statements_and_params = zip(cycle(["INSERT INTO test3rf.test_async_paging_verify "
                                           "(k1, k2, v) VALUES (0, %s, %s)"]),
                                    [(i, i + 1) for i in range(100)])
        execute_concurrent(self.session, statements_and_params)

        prepared = self.session.prepare("SELECT * FROM test3rf.test_async_paging_verify")

        for fetch_size in (2, 3, 7, 10, 99, 100, 101, 10000):
            self.session.default_fetch_size = fetch_size
            results = self.session.execute_async("SELECT * FROM test3rf.test_async_paging_verify").result()
            result_array = []
            value_array = []
            for result in results:
                result_array.append(result.k2)
                value_array.append(result.v)

            self.assertSequenceEqual(range(100), result_array)
            self.assertSequenceEqual(range(1, 101), value_array)

            statement = SimpleStatement("SELECT * FROM test3rf.test_async_paging_verify")
            results = self.session.execute_async(statement).result()
            result_array = []
            value_array = []
            for result in results:
                result_array.append(result.k2)
                value_array.append(result.v)

            self.assertSequenceEqual(range(100), result_array)
            self.assertSequenceEqual(range(1, 101), value_array)

            results = self.session.execute_async(prepared).result()
            result_array = []
            value_array = []
            for result in results:
                result_array.append(result.k2)
                value_array.append(result.v)

            self.assertSequenceEqual(range(100), result_array)
            self.assertSequenceEqual(range(1, 101), value_array)
示例#23
0
    def test_paging(self):
        statements_and_params = zip(cycle(["INSERT INTO test3rf.test (k, v) VALUES (%s, 0)"]),
                                    [(i, ) for i in range(100)])
        execute_concurrent(self.session, list(statements_and_params))

        prepared = self.session.prepare("SELECT * FROM test3rf.test")

        for fetch_size in (2, 3, 7, 10, 99, 100, 101, 10000):
            self.session.default_fetch_size = fetch_size
            self.assertEqual(100, len(list(self.session.execute("SELECT * FROM test3rf.test"))))

            statement = SimpleStatement("SELECT * FROM test3rf.test")
            self.assertEqual(100, len(list(self.session.execute(statement))))

            self.assertEqual(100, len(list(self.session.execute(prepared))))
示例#24
0
    def insert_points(self, metric_name, timestamps_and_values):
        """Insert points for a given metric.

        Args:
          metric_name: A graphite-like metric name (like "my.own.metric")
          timestamps_and_values: An iterable of (timestamp in seconds, values as double)
        """
        self._check_connected()
        statements_and_args = [
            self._make_insert_points(metric_name, t, v)
            for t, v in timestamps_and_values
        ]
        c_concurrent.execute_concurrent(
            self.__session, statements_and_args, concurrency=self.__concurrency,
        )
    def test_paging(self):
        statements_and_params = zip(cycle(["INSERT INTO test3rf.test (k, v) VALUES (%s, 0)"]),
                                    [(i, ) for i in range(100)])
        execute_concurrent(self.session, list(statements_and_params))

        prepared = self.session.prepare("SELECT * FROM test3rf.test")

        for fetch_size in (2, 3, 7, 10, 99, 100, 101, 10000):
            self.session.default_fetch_size = fetch_size
            self.assertEqual(100, len(list(self.session.execute("SELECT * FROM test3rf.test"))))

            statement = SimpleStatement("SELECT * FROM test3rf.test")
            self.assertEqual(100, len(list(self.session.execute(statement))))

            self.assertEqual(100, len(list(self.session.execute(prepared))))
示例#26
0
    def test_paging_callbacks(self):
        statements_and_params = zip(cycle(["INSERT INTO test3rf.test (k, v) VALUES (%s, 0)"]),
                                    [(i, ) for i in range(100)])
        execute_concurrent(self.session, list(statements_and_params))

        prepared = self.session.prepare("SELECT * FROM test3rf.test")

        for fetch_size in (2, 3, 7, 10, 99, 100, 101, 10000):
            self.session.default_fetch_size = fetch_size
            future = self.session.execute_async("SELECT * FROM test3rf.test")

            event = Event()
            counter = count()

            def handle_page(rows, future, counter):
                for row in rows:
                    next(counter)

                if future.has_more_pages:
                    future.start_fetching_next_page()
                else:
                    event.set()

            def handle_error(err):
                event.set()
                self.fail(err)

            future.add_callbacks(callback=handle_page, callback_args=(future, counter), errback=handle_error)
            event.wait()
            self.assertEqual(next(counter), 100)

            # simple statement
            future = self.session.execute_async(SimpleStatement("SELECT * FROM test3rf.test"))
            event.clear()
            counter = count()

            future.add_callbacks(callback=handle_page, callback_args=(future, counter), errback=handle_error)
            event.wait()
            self.assertEqual(next(counter), 100)

            # prepared statement
            future = self.session.execute_async(prepared)
            event.clear()
            counter = count()

            future.add_callbacks(callback=handle_page, callback_args=(future, counter), errback=handle_error)
            event.wait()
            self.assertEqual(next(counter), 100)
def ingest_data(file, session):
    select_statement = session.prepare("""
		INSERT INTO googleapp.AppList (uid, app, category, rating, reviews, size, installs, type, price, content_rating, genres, last_updated, current_ver, android_ver)
		VALUES (now(),?,?,?,?,?,?,?,?,?,?,?,?,?)
		""")

    dataset = pd.read_csv(file)
    dataset = dataset.dropna()
    dataset.apply(lambda x: combine_data(select_statement, x), axis=1)

    logging.info('Ingestion Start: ' + file)
    start_time = datetime.datetime.now()
    results = execute_concurrent(session,
                                 statements_and_params,
                                 concurrency=100,
                                 raise_on_first_error=False)

    for (success, result) in results:
        if not success:
            logging.info('Ingestion Failed: ' + file)

    logging.info('Ingestion Success: ' + file)
    end_time = datetime.datetime.now()
    logging.info('For {} rows and {} columns'.format(dataset.shape[0],
                                                     dataset.shape[1]))
    logging.info('Each row takes {}'.format(
        str((end_time - start_time) / dataset.shape[0])))
    logging.info('File ingested: {}'.format(file))
    return 1
示例#28
0
    def __analyze__(self):
        statements_and_params = []
        select_statement = self.cassandra_session.prepare(
            "select * from penguins where zooniverse_id = ?")

        for subject_set in SubjectGenerator(self):
            for zooniverse_id in subject_set:
                statements_and_params.append(
                    (select_statement, [zooniverse_id]))

                if len(statements_and_params) == 50:
                    results = execute_concurrent(self.cassandra_session,
                                                 statements_and_params,
                                                 raise_on_first_error=False)
                    statements_and_params = []

                    for zooniverse_id2, (success,
                                         record) in zip(subject_set, results):
                        if record != []:
                            # self.__image_setup__(zooniverse_id)
                            self.__plot_image__(zooniverse_id2)

                            aggregation = json.loads(record[0].aggregations)

                            for pt_index, pt in aggregation["1"][
                                    "point"].items():
                                if pt_index in ["param", "all_users"]:
                                    continue
                                x, y = pt["center"]
                                plt.plot([x], [y], '.', color="blue")

                            plt.show()

            break
示例#29
0
    def test_no_connection_refused_on_timeout(self):
        """
        Test for PYTHON-91 "Connection closed after LWT timeout"
        Verifies that connection to the cluster is not shut down when timeout occurs.
        Number of iterations can be specified with LWT_ITERATIONS environment variable.
        Default value is 1000
        """
        insert_statement = self.session.prepare("INSERT INTO test3rf.lwt (k, v) VALUES (0, 0) IF NOT EXISTS")
        delete_statement = self.session.prepare("DELETE FROM test3rf.lwt WHERE k = 0 IF EXISTS")

        iterations = int(os.getenv("LWT_ITERATIONS", 1000))

        # Prepare series of parallel statements
        statements_and_params = []
        for i in range(iterations):
            statements_and_params.append((insert_statement, ()))
            statements_and_params.append((delete_statement, ()))

        received_timeout = False
        results = execute_concurrent(self.session, statements_and_params, raise_on_first_error=False)
        for (success, result) in results:
            if success:
                continue
            # In this case result is an exception
            if type(result).__name__ == "NoHostAvailable":
                self.fail("PYTHON-91: Disconnected from Cassandra: %s" % result.message)
                break
            if type(result).__name__ == "WriteTimeout":
                received_timeout = True
                continue
            self.fail("Unexpected exception %s: %s" % (type(result).__name__, result.message))
            break

        # Make sure test passed
        self.assertTrue(received_timeout)
示例#30
0
    def __analyze__(self):
        statements_and_params = []
        select_statement = self.cassandra_session.prepare("select * from penguins where zooniverse_id = ?")

        for subject_set in SubjectGenerator(self):
            for zooniverse_id in subject_set:
                statements_and_params.append((select_statement, [zooniverse_id]))

                if len(statements_and_params) == 50:
                    results = execute_concurrent(self.cassandra_session, statements_and_params, raise_on_first_error=False)
                    statements_and_params = []

                    for zooniverse_id2,(success,record) in zip(subject_set,results):
                        if record != []:
                            # self.__image_setup__(zooniverse_id)
                            self.__plot_image__(zooniverse_id2)

                            aggregation = json.loads(record[0].aggregations)

                            for pt_index,pt in aggregation["1"]["point"].items():
                                if pt_index in ["param","all_users"]:
                                    continue
                                x,y= pt["center"]
                                plt.plot([x],[y],'.',color="blue")

                            plt.show()

            break
def list_entity_tags(currency, entity_id):
    # from entity id to list of tags
    session = get_session(currency, 'transformed')
    entity_group = get_id_group(entity_id)
    query = "SELECT * FROM cluster_tags WHERE cluster_group = %s and cluster" \
            " = %s"
    concurrent_query = "SELECT * FROM address_by_id_group WHERE " \
                       "address_id_group = %s and address_id = %s"

    results = session.execute(query, [entity_group, entity_id])

    # concurrent queries
    statements_and_params = []
    for row in results.current_rows:
        address_id_group = get_id_group(row.address_id)
        params = (address_id_group, row.address_id)
        statements_and_params.append((concurrent_query, params))
    addresses = execute_concurrent(session, statements_and_params,
                                   raise_on_first_error=False)
    id_address = dict()  # to temporary store the id-address mapping
    for (success, address) in addresses:
        if not success:
            pass
        else:
            id_address[address.one().address_id] = address.one().address
    entity_tags = []
    for row in results.current_rows:
        entity_tags.append(Tag.from_entity_row(row, id_address[row.address_id],
                                               currency).to_dict())

    return entity_tags
示例#32
0
        def _select(self, statements_and_parameters):
            """
            Execute a list of statements and parameters returning data.

            :param iterable[tuple] statements_and_parameters: list of statements and parameters
            :rtype: list[Row]
            :return: the rows matching the queries
            """
            ret = []
            size = self.CONCURRENCY
            for sub_sequence in CassandraClient.split_sequence(
                    statements_and_parameters, size):
                results = c_concurrent.execute_concurrent(
                    self._session,
                    sub_sequence,
                    concurrency=size,
                )
                for result in results:
                    success, rows = result
                    if success:
                        for row in rows:
                            ret.append(row)
                    else:
                        raise RuntimeError
            return ret
示例#33
0
    def _do_lots_of_schema_actions(self, session):
        for n in range(20):
            session.execute(
                "create table alter_me_{0} (id uuid primary key, s1 int, s2 int, s3 int, s4 int, s5 int, s6 int, s7 int);"
                .format(n))
            session.execute(
                "create table index_me_{0} (id uuid primary key, c1 int, c2 int, c3 int, c4 int, c5 int, c6 int, c7 int);"
                .format(n))

        wait(10)
        cmds = []
        for n in range(20):
            cmds.append((
                "create table new_table_{0} (id uuid primary key, c1 int, c2 int, c3 int, c4 int);"
                .format(n), ()))
            for a in range(1, 8):
                cmds.append(
                    ("alter table alter_me_{0} drop s{1};".format(n, a), ()))
                cmds.append(
                    ("alter table alter_me_{0} add c{1} int;".format(n,
                                                                     a), ()))
                cmds.append((
                    "create index ix_index_me_{0}_c{1} on index_me_{0} (c{1});"
                    .format(n, a), ()))

        results = execute_concurrent(session,
                                     cmds,
                                     concurrency=100,
                                     raise_on_first_error=True)
        for (success, result) in results:
            assert success, "didn't get success: {}".format(result)
示例#34
0
    def insert_and_validate_list_generator(self, reverse, slowdown):
        """
        This utility method will execute submit various statements for execution using the ConcurrentExecutorGenResults,
        then invoke a separate thread to execute the callback associated with the futures registered
        for those statements. The parameters will toggle various timing, and ordering changes.
        Finally it will validate that the results were returned in the order they were submitted
        :param reverse: Execute the callbacks in the opposite order that they were submitted
        :param slowdown: Cause intermittent queries to perform slowly
        """
        our_handler = MockResponseResponseFuture(reverse=reverse)
        mock_session = Mock()
        statements_and_params = zip(
            cycle(["INSERT INTO test3rf.test (k, v) VALUES (%s, 0)"]),
            [(i, ) for i in range(100)])
        mock_session.execute_async.return_value = our_handler

        t = TimedCallableInvoker(our_handler, slowdown=slowdown)
        t.start()
        try:
            results = execute_concurrent(mock_session,
                                         statements_and_params,
                                         results_generator=True)
            self.validate_result_ordering(results)
        finally:
            t.stop()
示例#35
0
    def test_create_lots_of_tables_concurrently(self):
        """
        create tables across multiple threads concurrently
        """
        cluster = self.cluster
        cluster.populate(3).start()

        node1, node2, node3 = cluster.nodelist()
        session = self.cql_connection(node1)
        session.execute(
            "create keyspace lots_o_tables WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1};"
        )
        session.execute("use lots_o_tables")
        wait(5)

        cmds = [(
            "create table t_{0} (id uuid primary key, c1 text, c2 text, c3 text, c4 text)"
            .format(n), ()) for n in range(250)]
        results = execute_concurrent(session,
                                     cmds,
                                     raise_on_first_error=True,
                                     concurrency=200)

        for (success, result) in results:
            assert success, "didn't get success on table create: {}".format(
                result)

        wait(10)

        session.cluster.refresh_schema_metadata()
        table_meta = session.cluster.metadata.keyspaces["lots_o_tables"].tables
        assert 250 == len(table_meta)
        self.validate_schema_consistent(node1)
        self.validate_schema_consistent(node2)
        self.validate_schema_consistent(node3)
示例#36
0
    def test_create_lots_of_alters_concurrently(self):
        """
        create alters across multiple threads concurrently
        """
        cluster = self.cluster
        cluster.populate(3).start()

        node1, node2, node3 = cluster.nodelist()
        session = self.cql_connection(node1)
        session.execute("create keyspace lots_o_alters WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1};")
        session.execute("use lots_o_alters")
        for n in range(10):
            session.execute("create table base_{0} (id uuid primary key)".format(n))
        wait(5)

        cmds = [("alter table base_{0} add c_{1} int".format(randrange(0, 10), n), ()) for n in range(500)]

        logger.debug("executing 500 alters")
        results = execute_concurrent(session, cmds, raise_on_first_error=True, concurrency=150)

        for (success, result) in results:
            assert success, "didn't get success on table create: {}".format(result)

        logger.debug("waiting for alters to propagate")
        wait(30)

        session.cluster.refresh_schema_metadata()
        table_meta = session.cluster.metadata.keyspaces["lots_o_alters"].tables
        column_ct = sum([len(table.columns) for table in list(table_meta.values())])

        # primary key + alters
        assert 510 == column_ct
        self.validate_schema_consistent(node1)
        self.validate_schema_consistent(node2)
        self.validate_schema_consistent(node3)
    def test_create_lots_of_tables_concurrently(self):
        """
        create tables across multiple threads concurrently
        """
        cluster = self.cluster
        cluster.populate(3).start()

        node1, node2, node3 = cluster.nodelist()
        session = self.cql_connection(node1)
        session.execute("create keyspace lots_o_tables WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1};")
        session.execute("use lots_o_tables")
        wait(5)

        cmds = [("create table t_{0} (id uuid primary key, c1 text, c2 text, c3 text, c4 text)".format(n), ()) for n in range(250)]
        results = execute_concurrent(session, cmds, raise_on_first_error=True, concurrency=200)

        for (success, result) in results:
            assert success, "didn't get success on table create: {}".format(result)

        wait(10)

        session.cluster.refresh_schema_metadata()
        table_meta = session.cluster.metadata.keyspaces["lots_o_tables"].tables
        assert 250 == len(table_meta)
        self.validate_schema_consistent(node1)
        self.validate_schema_consistent(node2)
        self.validate_schema_consistent(node3)
def repair_table(
    contact_points,
    auth_provider,
    ssl_opts,
    keyspace,
    table,
    partitions,
    concurrency,
    timeout,
):
    try:
        len_partitions = len(partitions)

        cluster = Cluster(
            auth_provider=auth_provider,
            compression=False,
            contact_points=contact_points,
            ssl_options=ssl_opts,
        )
        session = cluster.connect(keyspace)

        partition_key = ", ".join([
            k.name for k in
            cluster.metadata.keyspaces[keyspace].tables[table].partition_key
        ])
        print(f"{keyspace}.{table} partition key: {partition_key}")

        select_query = f"""SELECT COUNT(1) FROM \"{keyspace}\".{table}
                           WHERE token({partition_key}) >= ? AND token({partition_key}) <= ?"""
        select_token = session.prepare(select_query)
        select_token.consistency_level = ConsistencyLevel.ALL
        select_token.timeout = timeout

        stats = Counter()
        statements_and_params = []
        for start, stop in partitions:
            statements_and_params.append((select_token, (start, stop)))

        concurrent = execute_concurrent(
            session,
            statements_and_params,
            concurrency=concurrency,
            raise_on_first_error=False,
            results_generator=True,
        )
        for (success, result) in concurrent:
            if not success:
                stats["failed_partitions"] += 1
            else:
                for row in result.current_rows:
                    stats["repaired_rows"] += row.count
                    stats["repaired_partitions"] += 1
            print_stats(keyspace, table, len_partitions, stats)
        print_stats(keyspace, table, len_partitions, stats)
    except Exception as err:
        print(f"{keyspace}.{table} error: {err}")
        return False
    else:
        return True
    def test_execute_concurrent(self):
        for num_statements in (0, 1, 2, 7, 10, 99, 100, 101, 199, 200, 201):
            # write
            statements = cycle(("INSERT INTO test3rf.test (k, v) VALUES (%s, %s)",))
            parameters = [(i, i) for i in range(num_statements)]

            results = execute_concurrent(self.session, zip(statements, parameters))
            self.assertEqual(num_statements, len(results))
            self.assertEqual([(True, None)] * num_statements, results)

            # read
            statements = cycle(("SELECT v FROM test3rf.test WHERE k=%s",))
            parameters = [(i,) for i in range(num_statements)]

            results = execute_concurrent(self.session, zip(statements, parameters))
            self.assertEqual(num_statements, len(results))
            self.assertEqual([(True, [(i,)]) for i in range(num_statements)], results)
    def test_idle_heartbeat(self):
        interval = 1
        cluster = Cluster(protocol_version=PROTOCOL_VERSION, idle_heartbeat_interval=interval)
        if PROTOCOL_VERSION < 3:
            cluster.set_core_connections_per_host(HostDistance.LOCAL, 1)
        session = cluster.connect()

        # This test relies on impl details of connection req id management to see if heartbeats 
        # are being sent. May need update if impl is changed
        connection_request_ids = {}
        for h in cluster.get_connection_holders():
            for c in h.get_connections():
                # make sure none are idle (should have startup messages)
                self.assertFalse(c.is_idle)
                with c.lock:
                    connection_request_ids[id(c)] = deque(c.request_ids)  # copy of request ids

        # let two heatbeat intervals pass (first one had startup messages in it)
        time.sleep(2 * interval + interval/10.)

        connections = [c for holders in cluster.get_connection_holders() for c in holders.get_connections()]

        # make sure requests were sent on all connections
        for c in connections:
            expected_ids = connection_request_ids[id(c)]
            expected_ids.rotate(-1)
            with c.lock:
                self.assertListEqual(list(c.request_ids), list(expected_ids))

        # assert idle status
        self.assertTrue(all(c.is_idle for c in connections))

        # send messages on all connections
        statements_and_params = [("SELECT release_version FROM system.local", ())] * len(cluster.metadata.all_hosts())
        results = execute_concurrent(session, statements_and_params)
        for success, result in results:
            self.assertTrue(success)

        # assert not idle status
        self.assertFalse(any(c.is_idle if not c.is_control_connection else False for c in connections))

        # holders include session pools and cc
        holders = cluster.get_connection_holders()
        self.assertIn(cluster.control_connection, holders)
        self.assertEqual(len(holders), len(cluster.metadata.all_hosts()) + 1)  # hosts pools, 1 for cc

        # include additional sessions
        session2 = cluster.connect()

        holders = cluster.get_connection_holders()
        self.assertIn(cluster.control_connection, holders)
        self.assertEqual(len(holders), 2 * len(cluster.metadata.all_hosts()) + 1)  # 2 sessions' hosts pools, 1 for cc

        cluster._idle_heartbeat.stop()
        cluster._idle_heartbeat.join()
        assert_quiescent_pool_state(self, cluster)

        cluster.shutdown()
示例#41
0
    def test_idle_heartbeat(self):
        interval = 1
        cluster = Cluster(protocol_version=PROTOCOL_VERSION, idle_heartbeat_interval=interval)
        if PROTOCOL_VERSION < 3:
            cluster.set_core_connections_per_host(HostDistance.LOCAL, 1)
        session = cluster.connect()

        # This test relies on impl details of connection req id management to see if heartbeats 
        # are being sent. May need update if impl is changed
        connection_request_ids = {}
        for h in cluster.get_connection_holders():
            for c in h.get_connections():
                # make sure none are idle (should have startup messages)
                self.assertFalse(c.is_idle)
                with c.lock:
                    connection_request_ids[id(c)] = deque(c.request_ids)  # copy of request ids

        # let two heatbeat intervals pass (first one had startup messages in it)
        time.sleep(2 * interval + interval/10.)

        connections = [c for holders in cluster.get_connection_holders() for c in holders.get_connections()]

        # make sure requests were sent on all connections
        for c in connections:
            expected_ids = connection_request_ids[id(c)]
            expected_ids.rotate(-1)
            with c.lock:
                self.assertListEqual(list(c.request_ids), list(expected_ids))

        # assert idle status
        self.assertTrue(all(c.is_idle for c in connections))

        # send messages on all connections
        statements_and_params = [("SELECT release_version FROM system.local", ())] * len(cluster.metadata.all_hosts())
        results = execute_concurrent(session, statements_and_params)
        for success, result in results:
            self.assertTrue(success)

        # assert not idle status
        self.assertFalse(any(c.is_idle if not c.is_control_connection else False for c in connections))

        # holders include session pools and cc
        holders = cluster.get_connection_holders()
        self.assertIn(cluster.control_connection, holders)
        self.assertEqual(len(holders), len(cluster.metadata.all_hosts()) + 1)  # hosts pools, 1 for cc

        # include additional sessions
        session2 = cluster.connect()

        holders = cluster.get_connection_holders()
        self.assertIn(cluster.control_connection, holders)
        self.assertEqual(len(holders), 2 * len(cluster.metadata.all_hosts()) + 1)  # 2 sessions' hosts pools, 1 for cc

        cluster._idle_heartbeat.stop()
        cluster._idle_heartbeat.join()
        assert_quiescent_pool_state(self, cluster)

        cluster.shutdown()
示例#42
0
    def load_warehouse_data(self, csv_file, session):
        query = session.prepare(
            "INSERT INTO warehouse (w_id, w_name, w_street_1, w_street_2, w_city, w_state,"
            "w_zip, w_ytd) VALUES (?, ?, ?, ?, ?, ?, ?, ?)")
        reader = csv.reader(csv_file)
        query_and_params = []
        for line in itertools.islice(reader, self.ROW_COUNT):
            params = (int(line[0]), line[1], line[2], line[3], line[4],
                      line[5], line[6], float(line[8]))
            query_and_params.append((query, params))

            # Stores data needed for other methods
            w_id, w_name = line[0], line[1]
            self.map_w_name[w_id] = w_name

        execute_concurrent(session,
                           query_and_params,
                           raise_on_first_error=True)
示例#43
0
 def write_batch(self, metrics):
     with self._lock:
         batch_list = self._metric_batch.get_all_batches()
         results = execute_concurrent(self._session,
                                      batch_list,
                                      raise_on_first_error=True)
         self._handle_results(results)
         self._metric_batch.clear()
         LOG.info("flushed %s metrics", len(metrics))
示例#44
0
    def load_district_next_smallest_order_id_data(self, csv_file, session):
        query = session.prepare(
            "INSERT INTO district_next_smallest_order_id (d_w_id, d_id, d_next_smallest_o_id) "
            "VALUES (?, ?, ?)")
        reader = csv.reader(csv_file)
        query_and_params = []
        for line in itertools.islice(reader, self.ROW_COUNT):
            d_w_id, d_id = line[0], line[1]
            key = self.JOIN_CH.join((d_w_id, d_id))
            d_next_smallest_o_id = (self.map_last_delivery[key] if key
                                    in self.map_last_delivery else 0) + 1

            query_and_params.append(
                (query, (int(d_w_id), int(d_id), d_next_smallest_o_id)))

        execute_concurrent(session,
                           query_and_params,
                           raise_on_first_error=True)
示例#45
0
    def __cassandra_annotations__(self,workflow_id,subject_set):
        """
        get the annotations from Cassandra
        :return:
        """
        assert isinstance(subject_set,list) or isinstance(subject_set,set)

        version = int(math.floor(float(self.versions[workflow_id])))

        # todo - do this better
        width = 2000
        height = 2000

        classification_tasks,marking_tasks = self.workflows[workflow_id]
        raw_classifications = {}
        raw_markings = {}

        if subject_set is None:
            subject_set = self.__load_subjects__(workflow_id)

        total = 0

        # do this in bite sized pieces to avoid overwhelming DB
        for s in self.__chunks__(subject_set,15):
            statements_and_params = []

            if self.ignore_versions:
                select_statement = self.cassandra_session.prepare("select user_id,annotations,workflow_version from "+self.classification_table+" where project_id = ? and subject_id = ? and workflow_id = ?")
            else:
                select_statement = self.cassandra_session.prepare("select user_id,annotations,workflow_version from "+self.classification_table+" where project_id = ? and subject_id = ? and workflow_id = ? and workflow_version = ?")

            for subject_id in s:
                if self.ignore_versions:
                    params = (int(self.project_id),subject_id,int(workflow_id))
                else:
                    params = (int(self.project_id),subject_id,int(workflow_id),version)
                statements_and_params.append((select_statement, params))
            results = execute_concurrent(self.cassandra_session, statements_and_params, raise_on_first_error=False)

            for subject_id,(success,record_list) in zip(s,results):
                if not success:
                    print record_list
                assert success


                # seem to have the occasional "retired" subject with no classifications, not sure
                # why this is possible but if it can happen, just make a note of the subject id and skip
                if record_list == []:
                    # print "warning :: subject " + str(subject_id) + " has no classifications"
                    continue

                for ii,record in enumerate(record_list):
                    if record.user_id not in self.experts:
                        yield subject_id,record.user_id,record.annotations

        raise StopIteration()
def concurrent_worker(ninserts, threadnum, queue):
    # get connection
    connection = connect(SEED_NODES, keyspace=KEYSPACE, datacenter=DATACENTER)
    preparedstmt_albertsons = connection.prepare("INSERT INTO tx_details_raw (super_chain, chain, merchant, tx_time, settle_time,amount, terminal_id) VALUES (?, ?, ?, ?, ?, ?, ?)")
    preparedstmt_aldi = connection.prepare("INSERT INTO tx_details_raw (super_chain, chain, merchant, tx_time, settle_time,amount, terminal_id) VALUES (?, ?, ?, ?, ?, ?, ?)")
    preparedstmt_kroger = connection.prepare("INSERT INTO tx_details_raw (super_chain, chain, merchant, tx_time, settle_time,amount, terminal_id) VALUES (?, ?, ?, ?, ?, ?, ?)")

    preparedstmt_albertsons.consistency_level=CONSISTENCY
    preparedstmt_aldi.consistency_level=CONSISTENCY
    preparedstmt_kroger.consistency_level=CONSISTENCY



    inserts = 0
    total_insert_time = 0.0
    while (inserts < ninserts):
        # make a unique and incremental serial number across threads
        chain_albertsons= ['Albertsons', 'Safeway', 'Lucky', 'United']
        chain_aldi= ['Aldi', 'Traders Joes']
        chain_kroger= ['Bakers', 'Food4Less', 'Dhillon']
        merchant=['Marc', 'Ankur', 'Guy', 'Phil', 'The Pizza Guy']
        # make 2 to 9 inserts for this sernum
        statements_and_params = []
        for i in xrange(random.randint(2,9)):
            # tx_details_raw                                        (super_chain, chain,                            merchant,               tx_time,                settle_time,amount, terminal_id) VALUES (?, ?, ?, ?, ?, ?, ?)")
            statements_and_params.append([preparedstmt_albertsons, ('Albertsons', random.choice(chain_albertsons), random.choice(merchant), datetime.datetime.utcnow(),datetime.datetime.utcnow(), round(random.uniform(.1, 10000), 2), random.randint(1,100000000))])
            statements_and_params.append([preparedstmt_aldi, ('Aldi', random.choice(chain_aldi), random.choice(merchant), datetime.datetime.utcnow(),
                                                         datetime.datetime.utcnow(), round(random.uniform(.1, 10000), 2), random.randint(1,100000000))])
            statements_and_params.append([preparedstmt_kroger, ('Kroger', random.choice(chain_kroger), random.choice(merchant), datetime.datetime.utcnow(),
                                                         datetime.datetime.utcnow(), round(random.uniform(.1, 10000), 2), random.randint(1,100000000))])

        start_ins_time = datetime.datetime.now()
        execute_concurrent(connection, statements_and_params)
        stop_ins_time = datetime.datetime.now()
        insert_time = (stop_ins_time - start_ins_time).total_seconds()
        total_insert_time += insert_time
        inserts += len(statements_and_params)

    print 'Thread %d, performed %d inserts in %f secs (%f inserts/sec)' %(threadnum, ninserts, total_insert_time, inserts / total_insert_time)
    connection.shutdown()
    # save all the thread specific data
    queue.put([total_insert_time, inserts, inserts / total_insert_time])
    def execute_concurrent_helper(self, session, query, results_generator=False):
        count = 0
        while count < 100:
            try:
                return execute_concurrent(session, query, results_generator=False)
            except (ReadTimeout, WriteTimeout, OperationTimedOut, ReadFailure, WriteFailure):
                ex_type, ex, tb = sys.exc_info()
                log.warn("{0}: {1} Backtrace: {2}".format(ex_type.__name__, ex, traceback.extract_tb(tb)))
                del tb
                count += 1

        raise RuntimeError("Failed to execute query after 100 attempts: {0}".format(query))
示例#48
0
    def __store_results__(self,workflow_id,aggregations):
        if self.gold_standard:
            db = "gold_standard_penguins"
        else:
            db = "penguins"

        # try:
        #     self.cassandra_session.execute("drop table " + db)
        # except cassandra.InvalidRequest:
        #     print "table did not already exist"
        #
        # self.cassandra_session.execute("CREATE TABLE " + db + " (zooniverse_id text, aggregations text, primary key(zooniverse_id))")

        insert_statement = self.cassandra_session.prepare("""
                insert into """ + db + """ (zooniverse_id,aggregations)
                values (?,?)""")
        statements_and_params = []
        for zooniverse_id in aggregations:
            statements_and_params.append((insert_statement,(zooniverse_id,json.dumps(aggregations[zooniverse_id]))))

        execute_concurrent(self.cassandra_session, statements_and_params, raise_on_first_error=True)
    def write_batch(self, metrics):

        with self._lock:
            batch_list = self._metric_batch.get_all_batches()

            results = execute_concurrent(self._session, batch_list, raise_on_first_error=True)

            self._handle_results(results)

            self._metric_batch.clear()

            LOG.info("flushed %s metrics", len(metrics))
示例#50
0
    def test_paging_verify_writes(self):
        statements_and_params = zip(cycle(["INSERT INTO test3rf.test (k, v) VALUES (%s, 0)"]),
                                    [(i, ) for i in range(100)])
        execute_concurrent(self.session, statements_and_params)

        prepared = self.session.prepare("SELECT * FROM test3rf.test")

        for fetch_size in (2, 3, 7, 10, 99, 100, 101, 10000):
            self.session.default_fetch_size = fetch_size
            results = self.session.execute("SELECT * FROM test3rf.test")
            result_array = set()
            result_set = set()
            for result in results:
                result_array.add(result.k)
                result_set.add(result.v)

            self.assertEqual(set(range(100)), result_array)
            self.assertEqual(set([0]), result_set)

            statement = SimpleStatement("SELECT * FROM test3rf.test")
            results = self.session.execute(statement)
            result_array = set()
            result_set = set()
            for result in results:
                result_array.add(result.k)
                result_set.add(result.v)

            self.assertEqual(set(range(100)), result_array)
            self.assertEqual(set([0]), result_set)

            results = self.session.execute(prepared)
            result_array = set()
            result_set = set()
            for result in results:
                result_array.add(result.k)
                result_set.add(result.v)

            self.assertEqual(set(range(100)), result_array)
            self.assertEqual(set([0]), result_set)
示例#51
0
def main():
    row_count = 100000
    max_insert = 10

    log.info('truncate table')
    session.execute('truncate ooi.vel3d_k_wfp_instrument')
    log.info('done truncating')

    log.info('generating row data')
    rows = create_rows(row_count)
    now = time.time()
    batches = []
    batch = BatchStatement()
    for i, row in enumerate(rows):
        if (i+1) % max_insert == 0:
            batches.append((batch, []))
            batch = BatchStatement()
        batch.add(insert, row)
    batches.append((batch, []))
    log.info('inserting')
    execute_concurrent(session, batches, concurrency=50)
    log.info('%d rows: %7.2f sec elapsed', row_count, time.time()-now)
示例#52
0
def _write_to_cdc_WriteFailure(session, insert_stmt):
    prepared = session.prepare(insert_stmt)
    start, rows_loaded, error_found = time.time(), 0, False
    rate_limited_debug = get_rate_limited_function(debug, 5)
    while not error_found:
        # We want to fail if inserting data takes too long. Locally this
        # takes about 10s, but let's be generous.
        assert_less_equal(
            (time.time() - start), 600,
            "It's taken more than 10 minutes to reach a WriteFailure trying "
            'to overrun the space designated for CDC commitlogs. This could '
            "be because data isn't being written quickly enough in this "
            'environment, or because C* is failing to reject writes when '
            'it should.'
        )

        # If we haven't logged from here in the last 5s, do so.
        rate_limited_debug(
            '  data load step has lasted {s:.2f}s, '
            'loaded {r} rows'.format(s=(time.time() - start), r=rows_loaded))

        batch_results = list(execute_concurrent(
            session,
            ((prepared, ()) for _ in range(1000)),
            concurrency=500,
            # Don't propagate errors to the main thread. We expect at least
            # one WriteFailure, so we handle it below as part of the
            # results recieved from this method.
            raise_on_first_error=False
        ))

        # Here, we track the number of inserted values by getting the
        # number of successfully completed statements...
        rows_loaded += len([br for br in batch_results if br[0]])
        # then, we make sure that the only failures are the expected
        # WriteFailures.
        assert_equal([],
                     [result for (success, result) in batch_results
                      if not success and not isinstance(result, WriteFailure)])
        # Finally, if we find a WriteFailure, that means we've inserted all
        # the CDC data we can and so we flip error_found to exit the loop.
        if any(isinstance(result, WriteFailure) for (_, result) in batch_results):
            debug("write failed (presumably because we've overrun "
                  'designated CDC commitlog space) after '
                  'loading {r} rows in {s:.2f}s'.format(
                      r=rows_loaded,
                      s=time.time() - start))
            error_found = True
    return rows_loaded
    def test_no_raise_on_first_failure(self):
        statements = cycle(("INSERT INTO test3rf.test (k, v) VALUES (%s, %s)",))
        parameters = [(i, i) for i in range(100)]

        # we'll get an error back from the server
        parameters[57] = ("efefef", "awefawefawef")

        results = execute_concurrent(self.session, zip(statements, parameters), raise_on_first_error=False)
        for i, (success, result) in enumerate(results):
            if i == 57:
                self.assertFalse(success)
                self.assertIsInstance(result, InvalidRequest)
            else:
                self.assertTrue(success)
                self.assertEqual(None, result)
    def test_no_raise_on_first_failure_client_side(self):
        statements = cycle(("INSERT INTO test3rf.test (k, v) VALUES (%s, %s)",))
        parameters = [(i, i) for i in range(100)]

        # the driver will raise an error when binding the params
        parameters[57] = i

        results = execute_concurrent(self.session, zip(statements, parameters), raise_on_first_error=False)
        for i, (success, result) in enumerate(results):
            if i == 57:
                self.assertFalse(success)
                self.assertIsInstance(result, TypeError)
            else:
                self.assertTrue(success)
                self.assertEqual(None, result)
    def test_create_lots_of_indexes_concurrently(self):
        """
        create indexes across multiple threads concurrently
        """
        cluster = self.cluster
        cluster.populate(2).start()

        node1, node2 = cluster.nodelist()
        session = self.cql_connection(node1)
        session.execute("create keyspace lots_o_indexes WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1};")
        session.execute("use lots_o_indexes")
        for n in range(5):
            session.execute("create table base_{0} (id uuid primary key, c1 int, c2 int)".format(n))
            for ins in range(1000):
                session.execute("insert into base_{0} (id, c1, c2) values (uuid(), {1}, {2})".format(n, ins, ins))
        wait(5)

        logger.debug("creating indexes")
        cmds = []
        for n in range(5):
            cmds.append(("create index ix_base_{0}_c1 on base_{0} (c1)".format(n), ()))
            cmds.append(("create index ix_base_{0}_c2 on base_{0} (c2)".format(n), ()))

        results = execute_concurrent(session, cmds, raise_on_first_error=True)

        for (success, result) in results:
            assert success, "didn't get success on table create: {}".format(result)

        wait(5)

        logger.debug("validating schema and index list")
        session.cluster.control_connection.wait_for_schema_agreement()
        session.cluster.refresh_schema_metadata()
        index_meta = session.cluster.metadata.keyspaces["lots_o_indexes"].indexes
        self.validate_schema_consistent(node1)
        self.validate_schema_consistent(node2)
        assert 10 == len(index_meta)
        for n in range(5):
            assert "ix_base_{0}_c1".format(n) in index_meta
            assert "ix_base_{0}_c2".format(n) in index_meta

        logger.debug("waiting for indexes to fill in")
        wait(45)
        logger.debug("querying all values by secondary index")
        for n in range(5):
            for ins in range(1000):
                assert 1 == len(list(session.execute("select * from base_{0} where c1 = {1}".format(n, ins))))
                assert 1 == len(list(session.execute("select * from base_{0} where c2 = {1}".format(n, ))))
    def _do_lots_of_schema_actions(self, session):
        for n in range(20):
            session.execute("create table alter_me_{0} (id uuid primary key, s1 int, s2 int, s3 int, s4 int, s5 int, s6 int, s7 int);".format(n))
            session.execute("create table index_me_{0} (id uuid primary key, c1 int, c2 int, c3 int, c4 int, c5 int, c6 int, c7 int);".format(n))

        wait(10)
        cmds = []
        for n in range(20):
            cmds.append(("create table new_table_{0} (id uuid primary key, c1 int, c2 int, c3 int, c4 int);".format(n), ()))
            for a in range(1, 8):
                cmds.append(("alter table alter_me_{0} drop s{1};".format(n, a), ()))
                cmds.append(("alter table alter_me_{0} add c{1} int;".format(n, a), ()))
                cmds.append(("create index ix_index_me_{0}_c{1} on index_me_{0} (c{1});".format(n, a), ()))

        results = execute_concurrent(session, cmds, concurrency=100, raise_on_first_error=True)
        for (success, result) in results:
            assert success, "didn't get success: {}".format(result)