def __init__(self, client, qname, trend=5): super(Buyer, self).__init__(client, uuid.uuid4().hex) self.holdings = {} self.cash = 100000.0 self.history = {} self.trend = trend self.pool = ConnectionPool('example_consumer_Buyer') self.stored_holdings = ColumnFamily(self.pool, 'Holdings') self.quote_history = ColumnFamily(self.pool, 'Quotes') self.stored_cash = ColumnFamily(self.pool, 'Cash') try: cash = self.stored_cash.get('current') self.cash = cash['amount'] except ttypes.NotFoundException: self.stored_cash.insert('current', { 'amount': self.cash }) for symbol, columns in self.stored_holdings.get_range(): self.holdings[symbol] = (columns['number_of_shares'], columns['price'], columns['cost']) date_expression = create_index_expression('timestamp', datetime.date.today(), GT) date_clause = create_index_clause([date_expression], count=1000) for key, columns in self.quote_history.get_range(): symbol = columns['symbol'] price = columns['price'] self.add_quote(symbol, price)
def datatable(): session = cassandra.connect() session.set_keyspace(settings['cluster_name']) cass_client = ConnectionPool(settings['cluster_name'],[settings['cass_cluster']+':'+settings['thrift_port']], timeout=60) col_fam = ColumnFamily(cass_client,'parsed_data') session.default_timeout = 100 key1 = request.args.get('key') if key1 == None: key1=100 key1= int(key1) query = "SELECT last_timeid FROM main_count" # users contains 100 rows statement = session.execute(query) for x in statement: end_key = x[0] log_data = [[]for i in range(0,key1)] log_data_header,k = [], 0 temp = end_key-key1 for i in range(temp,end_key): expr2 = create_index_expression('logid',i) lause = create_index_clause([ expr2], count=1) test = list(col_fam.get_indexed_slices(lause, columns= ["timestamp", "host", "byte_transfer", "request_link", "request_details", "device_type", "operating_system", "request_type", "response_code", "response_time"]))[0][1] for m in test.values(): log_data[k].append(m) k += 1 if k == 1: for n in test.keys(): log_data_header.append(n) return render_template('datatable.html', data= log_data, data_header = log_data_header,index = key1)
class CassandraRepository(object): def __init__(self, keyspace, column_family_name): self.pool = ConnectionPool(keyspace, cassandra_settings.NODE_POOL) self.cf = ColumnFamily(self.pool, column_family_name) self.batch = {} def add_batch(self, batch, start_time=None): """ :param batch: """ self.cf.batch_insert(batch) if start_time is not None: print 'time to insert batch: %s ms' % (int(time.time() * 1000) - start_time) def get(self, timestamp): return self.cf.get(str(timestamp)) def get_range(self, start, end): return list(self.cf.get_range(start=str(start), finish=str(end))) def close(self): self.sys.close()
def get_row_key_id(domain): counter_column, counter_lock = domain_counter_map[domain] ## acquire lock before getting value of counter_lock.acquire() try: client = db_connection.get_client() cf = ColumnFamily(client, CONFIG_DOMAIN) ## get new key id id_key = cf.get(CONFIG_ROW, counter_column)[counter_column] ## increment value if not None if id_key: new_id_key = id_key + 1 cf.insert(CONFIG_ROW, {counter_column: new_id_key}, write_consistency_level=ConsistencyLevel.ALL) return id_key """ if id_key: str_id_key = str(id_key) str_id_key.zfill(MAX_PADDING_RANGE) return str_id_key else: return None """ finally: ## release lock before returning from this function counter_lock.release()
def get(): #################### TEMP #userId = 'user-784b9158-5233-454e-8dcf-c229cdff12c6' print 'Getting result for userId: {0} between time {1} and {2}'.format(userId, startTime, startTime) con = util.getConnection() logCF = ColumnFamily(con, 'event_log_mux') rowKeys = ['{0}:{1}'.format(userId, i+1) for i in range(4)] rows = logCF.multiget(rowKeys) print 'Shows rows multiplexes into different rows each individually sorted in reverse cronological order:' merge = {} for row in rows: print '>> '+str(row) merge = dict(merge.items() + rows[row].items()) for col in rows[row]: colstr = rows[row][col] coljson = json.loads(colstr) print '\tInsertion Timestamp: {0}'.format(coljson['insert_time']) final = collections.OrderedDict(sorted(merge.items(), reverse=True)) for k,v in final.iteritems(): coljson = json.loads(v) print 'insertion timestamp: {0}'.format(coljson['insert_time']) """
def __init__(self): err = '' self.pool = ConnectionPool('jobrun', server_list=options['hosts']) self.jl = ColumnFamily(self.pool, 'job_lookup') self.jr = ColumnFamily(self.pool, 'job_results') self.jd = ColumnFamily(self.pool, 'job_dashboard') self.jf = ColumnFamily(self.pool, 'job_failures')
def test_packing_disabled(self): self.cf = ColumnFamily(pool, 'Standard1', autopack_names=False, autopack_values=False) self.cf.insert('key', {'col': 'val'}) assert_raises(TypeError, self.cf.insert, args=('key', {123: 'val'})) assert_raises(TypeError, self.cf.insert, args=('key', {'col': 123})) assert_raises(TypeError, self.cf.insert, args=('key', {123: 123})) self.cf.remove('key')
def loadData(): con = util.getConnection() cf = ColumnFamily(con, 'videos_denorm') tagCF = ColumnFamily(con, 'tag_videos_composite') movies = util.readCSV('data/movies') for movie in movies: title = movie[0] uploader = movie[1] runtime = int(movie[2]) #convert to match column validator tags = movie[3] rowKey = title+":"+uploader print "Inserting in videos: {}.".format(str(movie)) row = \ { 'title':title, 'user_name':uploader, 'runtime_in_sec':runtime, } for tag in tags.split(','): print 'adding tag: {0} for movie: {1}'.format(tag, title) row['tag:{}'.format(tag.strip().lower())] = tag.strip() print 'inserting denorm: {}'.format(row) cf.insert(rowKey, row) print 'finishished insertion.' con.dispose()
def _get_analytics_start_time(self): try: col_family = ColumnFamily(self._pool, SYSTEM_OBJECT_TABLE) row = col_family.get(SYSTEM_OBJECT_ANALYTICS) except Exception as e: self._logger.error("Exception: analytics_start_time Failure %s" % e) return None # Initialize the dictionary before returning if (SYSTEM_OBJECT_START_TIME not in row): return None ret_row = {} ret_row[SYSTEM_OBJECT_START_TIME] = row[SYSTEM_OBJECT_START_TIME] if (SYSTEM_OBJECT_FLOW_START_TIME not in row): ret_row[SYSTEM_OBJECT_FLOW_START_TIME] = row[ SYSTEM_OBJECT_START_TIME] else: ret_row[SYSTEM_OBJECT_FLOW_START_TIME] = row[ SYSTEM_OBJECT_FLOW_START_TIME] if (SYSTEM_OBJECT_STAT_START_TIME not in row): ret_row[SYSTEM_OBJECT_STAT_START_TIME] = row[ SYSTEM_OBJECT_START_TIME] else: ret_row[SYSTEM_OBJECT_STAT_START_TIME] = row[ SYSTEM_OBJECT_STAT_START_TIME] if (SYSTEM_OBJECT_MSG_START_TIME not in row): ret_row[SYSTEM_OBJECT_MSG_START_TIME] = row[ SYSTEM_OBJECT_START_TIME] else: ret_row[SYSTEM_OBJECT_MSG_START_TIME] = row[ SYSTEM_OBJECT_MSG_START_TIME] return ret_row
def _update_analytics_start_time(self, start_time): pool = ConnectionPool( COLLECTOR_KEYSPACE, ['127.0.0.1:%s' % (self.__class__.cassandra_port)]) col_family = ColumnFamily(pool, SYSTEM_OBJECT_TABLE) col_family.insert(SYSTEM_OBJECT_ANALYTICS, {SYSTEM_OBJECT_START_TIME: start_time})
def execute(self): ## first validate data data_ok, fault = self._validate_data() if not data_ok: return (False, fault) ## if data ok, construct InsertCommands if self.op_type == CassandraQuery.OP_DELETE: try: domain = self.data.domain row_key = self.data.get_pk() client = db_connection.get_client() cf = ColumnFamily(client, domain) ## if cascading is enabled, first delete all DBObject and collections comprised in this DBObject if self.cascade: pass ## lastly remove data for current element cf.remove(row_key) return (True, None) except Exception, ex: return (False, ex)
def insert(self, data, return_id=False): """ Creates a new entity to represent a model. :param data: Model object represented by a list of (field, value) pairs. Each value is prepared for the insert operation. :param return_id: Value whether to return the id or key of newly created entity. """ pool = self.connection column_family_name = get_column_family() col_fam = CF(pool, column_family_name) col_fam_data = {} for field, value in data.iteritems(): col_fam_data[field] = value key = data.get(pk_column) if not key: key = str(uuid4()) try: col_fam.insert(key=key, columns=col_fam_data, write_consistency_level=self.connection. write_consistency_level) except Exception, e: print str(e)
def load_slaves(self, o): cf = ColumnFamily(self._pool, 'slaves') with cf.batch() as batch: for slave_id, name in o.items(): batch.insert(slave_id, {'name': name}) return len(o)
def build_from_id(self, build_id): """Obtain information about a build from its ID.""" cf = ColumnFamily(self.pool, 'builds') try: return cf.get(build_id) except NotFoundException: return None
def colum_family_content(self, keyspace_name, column_family_name): """Returns content of column family of given keyspace """ keyspace.cf_result = [] keyspace.error = "Unknown error : May be one of node in your cluster is down please check?" if not self.keyspace_contains(keyspace.local_system, keyspace_name, column_family_name): keyspace.error = "Desired Keyspace,Column Family pair could not be found." return False try: pool = ConnectionPool(keyspace=keyspace_name, server_list=keyspace.server_ips, prefill=False) except Exception as e: print e return False try: col_fam = ColumnFamily(pool, column_family_name) except Exception as e: print e return False result = [] try: tmp_result = col_fam.get_range(start='', finish='', buffer_size=10) for i in tmp_result: result.append(i) except Exception as e: print e return False keyspace.cf_result = result keyspace.tempks = keyspace_name keyspace.tempcf = column_family_name print result return result
def __init__(self, client, qname): super(Processor, self).__init__(client, qname) self.pool = ConnectionPool('processing_llama_Processor') self.trends = ColumnFamily(self.pool, 'Trend') def get_sleep_time(): return 60
def _update_analytics_start_time(self, start_time): if mockcassandra.use_cql(): cluster = Cluster(['127.0.0.1'], port=int(self.__class__.cassandra_port)) session = cluster.connect(COLLECTOR_KEYSPACE_CQL) query = "INSERT INTO {0} (key, \"{1}\") VALUES ('{2}', {3})".format( SYSTEM_OBJECT_TABLE, SYSTEM_OBJECT_START_TIME, SYSTEM_OBJECT_ANALYTICS, start_time) try: session.execute(query) except Exception as e: logging.error("INSERT INTO %s: Key %s Column %s Value %d " "FAILED: %s" % (SYSTEM_OBJECT_TABLE, SYSTEM_OBJECT_ANALYTICS, SYSTEM_OBJECT_START_TIME, start_time, str(e))) assert False else: cluster.shutdown() else: pool = ConnectionPool( COLLECTOR_KEYSPACE, ['127.0.0.1:%s' % (self.__class__.cassandra_port)]) col_family = ColumnFamily(pool, SYSTEM_OBJECT_TABLE) col_family.insert(SYSTEM_OBJECT_ANALYTICS, {SYSTEM_OBJECT_START_TIME: start_time})
def load_builders(self, o): cf = ColumnFamily(self._pool, 'builders') indices = ColumnFamily(self._pool, 'indices') batch = cf.batch() i_batch = indices.batch() for builder_id, params in o.items(): cat = params['category'] columns = { 'category': cat, 'master': unicode(params['master_id']), 'name': params['name'], } batch.insert(builder_id, columns) i_batch.insert('builder_category_to_builder_ids', {cat: {builder_id: ''}}) i_batch.insert('master_id_to_slave_ids', {columns['master']: { builder_id: ''}}) if len(params['slaves']): i_batch.insert('builder_id_to_slave_ids', {builder_id: { unicode(slave_id): '' for slave_id in params['slaves']}}) batch.send() i_batch.send() return len(o)
def get_builder(self, builder_id): """Obtain info about a builder from its ID.""" cf = ColumnFamily(self.pool, 'builders') try: return cf.get(builder_id) except NotFoundException: return None
def __init__(self, cls, pool, column_family, columns=None, raw_columns=False, **kwargs): """ Maps an existing class to a column family. Class fields become columns, and instances of that class can be represented as rows in standard column families or super columns in super column families. Instances of `cls` are returned from :meth:`get()`, :meth:`multiget()`, :meth:`get_range()` and :meth:`get_indexed_slices()`. `pool` is a :class:`~pycassa.pool.ConnectionPool` that will be used in the same way a :class:ColumnFamily uses one. `column_family` is the name of a column family to tie to `cls`. If `raw_columns` is ``True``, all columns will be fetched into the `raw_columns` field in requests. """ ColumnFamily.__init__(self, pool, column_family, **kwargs) self.cls = cls self.autopack_names = False self.raw_columns = raw_columns self.dict_class = util.OrderedDict self.defaults = {} self.fields = [] for name, val_type in self.cls.__dict__.iteritems(): if isinstance(val_type, CassandraType): self.fields.append(name) self.column_validators[name] = val_type self.defaults[name] = val_type.default
def __init__(self, cls, pool, column_family, raw_columns=False, **kwargs): """ Instances of `cls` are returned from :meth:`get()`, :meth:`multiget()`, :meth:`get_range()` and :meth:`get_indexed_slices()`. `pool` is a :class:`~pycassa.pool.ConnectionPool` that will be used in the same way a :class:`~.ColumnFamily` uses one. `column_family` is the name of a column family to tie to `cls`. If `raw_columns` is ``True``, all columns will be fetched into the `raw_columns` field in requests. """ ColumnFamily.__init__(self, pool, column_family, **kwargs) self.cls = cls self.autopack_names = False self.raw_columns = raw_columns self.dict_class = util.OrderedDict self.defaults = {} self.fields = [] for name, val_type in inspect.getmembers(self.cls): if name != 'key' and isinstance(val_type, CassandraType): self.fields.append(name) self.column_validators[name] = val_type self.defaults[name] = val_type.default if hasattr(self.cls, 'key') and isinstance(self.cls.key, CassandraType): self.key_validation_class = self.cls.key
def _check_cassandra(self, del_network_keyname, local_hostname, cassandra_listen_port): from pycassa.pool import ConnectionPool from pycassa.columnfamily import ColumnFamily pool1=ConnectionPool('config_db_uuid', [local_hostname+":"+cassandra_listen_port]) col_fam=ColumnFamily(pool1, 'obj_fq_name_table') return col_fam.get_count('virtual_network', columns=[del_network_keyname])
def __init__(self, cls, pool, column_family, columns=None, raw_columns=False, **kwargs): """ Maps an existing class to a column family. Class fields become columns, and instances of that class can be represented as rows in standard column families or super columns in super column families. Instances of `cls` are returned from :meth:`get()`, :meth:`multiget()`, :meth:`get_range()` and :meth:`get_indexed_slices()`. `pool` is a :class:`~pycassa.pool.ConnectionPool` that will be used in the same way a :class:`~.ColumnFamily` uses one. `column_family` is the name of a column family to tie to `cls`. If `raw_columns` is ``True``, all columns will be fetched into the `raw_columns` field in requests. """ ColumnFamily.__init__(self, pool, column_family, **kwargs) self.cls = cls self.autopack_names = False self.raw_columns = raw_columns self.dict_class = util.OrderedDict self.defaults = {} self.fields = [] for name, val_type in self.cls.__dict__.iteritems(): if isinstance(val_type, CassandraType): self.fields.append(name) self.column_validators[name] = val_type self.defaults[name] = val_type.default
def _get_analytics_ttls(self): ret_row = {} try: col_family = ColumnFamily(self._pool, SYSTEM_OBJECT_TABLE) row = col_family.get(SYSTEM_OBJECT_ANALYTICS) except Exception as e: self._logger.error("Exception: analytics_start_time Failure %s" % e) ret_row[SYSTEM_OBJECT_FLOW_DATA_TTL] = AnalyticsFlowTTL ret_row[SYSTEM_OBJECT_STATS_DATA_TTL] = AnalyticsStatisticsTTL ret_row[SYSTEM_OBJECT_CONFIG_AUDIT_TTL] = AnalyticsConfigAuditTTL ret_row[SYSTEM_OBJECT_GLOBAL_DATA_TTL] = AnalyticsTTL return ret_row if (SYSTEM_OBJECT_FLOW_DATA_TTL not in row): ret_row[SYSTEM_OBJECT_FLOW_DATA_TTL] = AnalyticsFlowTTL else: ret_row[SYSTEM_OBJECT_FLOW_DATA_TTL] = row[SYSTEM_OBJECT_FLOW_DATA_TTL] if (SYSTEM_OBJECT_STATS_DATA_TTL not in row): ret_row[SYSTEM_OBJECT_STATS_DATA_TTL] = AnalyticsStatisticsTTL else: ret_row[SYSTEM_OBJECT_STATS_DATA_TTL] = row[SYSTEM_OBJECT_STATS_DATA_TTL] if (SYSTEM_OBJECT_CONFIG_AUDIT_TTL not in row): ret_row[SYSTEM_OBJECT_CONFIG_AUDIT_TTL] = AnalyticsConfigAuditTTL else: ret_row[SYSTEM_OBJECT_CONFIG_AUDIT_TTL] = row[SYSTEM_OBJECT_CONFIG_AUDIT_TTL] if (SYSTEM_OBJECT_GLOBAL_DATA_TTL not in row): ret_row[SYSTEM_OBJECT_GLOBAL_DATA_TTL] = AnalyticsTTL else: ret_row[SYSTEM_OBJECT_GLOBAL_DATA_TTL] = row[SYSTEM_OBJECT_GLOBAL_DATA_TTL] return ret_row
def columnfamily_dump(host, port, keyspace, columnfamily, columns, limit, outfile, header, delimiter): pool = ConnectionPool(keyspace, ['{}:{}'.format(host, port)], timeout=None) col_fam = ColumnFamily(pool, columnfamily) if columns: keys = set(columns.split(u',')) else: rows = col_fam.get_range(row_count=limit) keys = set(key for key in itertools.chain.from_iterable(row[1].iterkeys() for row in rows)) keys.add(u'{}_id'.format(columnfamily)) writer = csv.DictWriter(outfile, keys, extrasaction=u'ignore', delimiter=delimiter) if header: writer.writeheader() rows = col_fam.get_range(columns=keys, row_count=limit) row_buffer_count = 0 csv_rows = [] for (id, data) in rows: d = {u'{}_id'.format(columnfamily): id} d.update(data) csv_rows.append(d) row_buffer_count += 1 if row_buffer_count >= col_fam.buffer_size: writer.writerows(csv_rows) csv_rows = [] row_buffer_count = 0 else: writer.writerows(csv_rows)
def _update_analytics_start_time(self, start_times): try: col_family = ColumnFamily(self._pool, SYSTEM_OBJECT_TABLE) col_family.insert(SYSTEM_OBJECT_ANALYTICS, start_times) except Exception as e: self._logger.error("Exception: update_analytics_start_time " "Connection Failure %s" % e)
def test_packing_enabled(self): self.cf = ColumnFamily(pool, 'Standard1') self.cf.insert('key', {'col': 'val'}) assert_raises(TypeError, self.cf.insert, args=('key', {123: 'val'})) assert_raises(TypeError, self.cf.insert, args=('key', {'col': 123})) assert_raises(TypeError, self.cf.insert, args=('key', {123: 123})) self.cf.remove('key')
def __init__(self,keySpace): self.pool = ConnectionPool(keySpace, ['localhost:9160']) self.col_fam_page = ColumnFamily(self.pool, 'Page') self.col_fam_publication = ColumnFamily(self.pool, 'Publication') self.col_fam_company = ColumnFamily(self.pool, 'Company') self.col_fam_location = ColumnFamily(self.pool, 'Location') self.col_fam_category = ColumnFamily(self.pool, 'Category')
def mass_insert(pool): cf_logs = ColumnFamily(pool, CF_LOGS) rnd_inst = random.Random() rnd_inst.seed(1) start = time.time() count = 0 try: for item in log_generator(1): msg = item[0] app = item[1] # http://pycassa.github.com/pycassa/assorted/time_uuid.html # http://www.slideshare.net/jeremiahdjordan/pycon-2012-apache-cassandra # http://www.slideshare.net/rbranson/how-do-i-cassandra @ slide 80 # https://github.com/pycassa/pycassa/issues/135 cf_logs.insert(app, { uuid.uuid1(): msg, }) count += 1 if count % 100 == 0: logging.info("Inserted %d columns", count) except KeyboardInterrupt: logging.info("Stopping...") end = time.time() avg = float(count) / (end - start) logging.info("Avg: %f insert/sec", avg)
def get_reverse(columnFamily, key) : "select values in a given column family" try : column = ColumnFamily(pool, columnFamily) return column.get(key,column_reversed=True) except: return {'status':0}
def remove_column(columnFamily, uid, columns) : "To remove columns from a key" try: column = ColumnFamily(pool, columnFamily) column.remove(uid,columns) except: return {'status':0}
def colum_family_content(self,machine_id,keyspace_name,column_family_name): """Returns content of column family of given keyspace """ print "->>>in column family content function" pool = ConnectionPool(keyspace_name, [machine_id]) col_fam = ColumnFamily(pool, column_family_name) result = col_fam.get_range(start='', finish='') return result
def test_caching_pre_11(self): version = tuple( [int(v) for v in sys._conn.describe_version().split('.')]) if version >= (19, 30, 0): raise SkipTest('CF specific caching no longer supported.') sys.create_column_family(TEST_KS, 'CachedCF10', row_cache_size=100, key_cache_size=100, row_cache_save_period_in_seconds=3, key_cache_save_period_in_seconds=3) pool = ConnectionPool(TEST_KS) cf = ColumnFamily(pool, 'CachedCF10') assert_equal(cf._cfdef.row_cache_size, 100) assert_equal(cf._cfdef.key_cache_size, 100) assert_equal(cf._cfdef.row_cache_save_period_in_seconds, 3) assert_equal(cf._cfdef.key_cache_save_period_in_seconds, 3) sys.alter_column_family(TEST_KS, 'CachedCF10', row_cache_size=200, key_cache_size=200, row_cache_save_period_in_seconds=4, key_cache_save_period_in_seconds=4) cf1 = ColumnFamily(pool, 'CachedCF10') assert_equal(cf1._cfdef.row_cache_size, 200) assert_equal(cf1._cfdef.key_cache_size, 200) assert_equal(cf1._cfdef.row_cache_save_period_in_seconds, 4) assert_equal(cf1._cfdef.key_cache_save_period_in_seconds, 4)
def insert(self, data, return_id = False): """ Creates a new entity to represent a model. :param data: Model object represented by a list of (field, value) pairs. Each value is prepared for the insert operation. :param return_id: Value whether to return the id or key of newly created entity. """ pool = self.connection column_family_name = get_column_family() col_fam = CF(pool, column_family_name) col_fam_data = {} for field, value in data.iteritems(): col_fam_data[field] = value key = data.get(pk_column) if not key: key = str(uuid4()) try: col_fam.insert(key=key, columns=col_fam_data, write_consistency_level=self.connection.write_consistency_level) except Exception, e: print str(e)
def _get_analytics_start_time(self): try: col_family = ColumnFamily(self._pool, SYSTEM_OBJECT_TABLE) row = col_family.get(SYSTEM_OBJECT_ANALYTICS) except Exception as e: self._logger.error("Exception: analytics_start_time Failure %s" % e) return None # Initialize the dictionary before returning if (SYSTEM_OBJECT_START_TIME not in row): return None ret_row = {} ret_row[SYSTEM_OBJECT_START_TIME] = row[SYSTEM_OBJECT_START_TIME] if (SYSTEM_OBJECT_FLOW_START_TIME not in row): ret_row[SYSTEM_OBJECT_FLOW_START_TIME] = row[SYSTEM_OBJECT_START_TIME] else: ret_row[SYSTEM_OBJECT_FLOW_START_TIME] = row[SYSTEM_OBJECT_FLOW_START_TIME] if (SYSTEM_OBJECT_STAT_START_TIME not in row): ret_row[SYSTEM_OBJECT_STAT_START_TIME] = row[SYSTEM_OBJECT_START_TIME] else: ret_row[SYSTEM_OBJECT_STAT_START_TIME] = row[SYSTEM_OBJECT_STAT_START_TIME] if (SYSTEM_OBJECT_MSG_START_TIME not in row): ret_row[SYSTEM_OBJECT_MSG_START_TIME] = row[SYSTEM_OBJECT_START_TIME] else: ret_row[SYSTEM_OBJECT_MSG_START_TIME] = row[SYSTEM_OBJECT_MSG_START_TIME] return ret_row
def store_blob(self, key, content, chunk_size=DEFAULT_BLOB_CHUNK_SIZE): cf = ColumnFamily(self.pool, 'blobs') chunks = len(content) / chunk_size + 1 sha1 = hashlib.sha1() sha1.update(content) offset = 0 i = 1 while True: b = content[offset:offset + chunk_size] # We prefix each part with "z" so the big chunks come at the end of # the row and our initial read for all the metadata doesn't span # excessive pages on disk. cf.insert(key, {'z:%04d' % i: b}) if len(b) < chunk_size: break offset += chunk_size i += 1 cf.insert(key, { 'version': 1, 'sha1': sha1.digest(), 'size': len(content), 'chunk_size': chunk_size, 'chunk_count': chunks, }) indices = ColumnFamily(self.pool, 'simple_indices') indices.insert('blobs', {key: ''}) indices.insert('blob_size', {key: str(len(content))}) return sha1.digest()
def slaves(self): """Obtain basic metadata about all slaves.""" cf = ColumnFamily(self.pool, 'slaves') for key, cols in cf.get_range(columns=['name']): yield key, cols['name']
def get_data(self, cf_name, key, start_time, end_time, output_json=False): cf = ColumnFamily(self.pool, cf_name) try: result = cf.multiget(self.gen_key_range(key, start_time, end_time), column_start=start_time*1000, column_finish=end_time*1000, column_count=10000000) if output_json: self.dump_json(result) except NotFoundException: pass
def show(query, cass): my_column_family = ColumnFamily(cass, 'my_column_family') try: result = my_column_family.get(1167864277) return result except NotFoundException, nfe: return HTTPError(404, 'Entity not found.')
def load_masters(self, o): cf = ColumnFamily(self._pool, 'masters') with cf.batch() as batch: for master_id, info in o.items(): batch.insert(master_id, info) return len(o)
def file_metadata(self, keys): """Obtain metadata for a stored file. Argument is an iterable of file keys whose data to obtain. """ cf = ColumnFamily(self.pool, 'files') return cf.multiget(keys)
def _get_analytics_start_time_thrift(self): try: col_family = ColumnFamily(self._pool, SYSTEM_OBJECT_TABLE) row = col_family.get(SYSTEM_OBJECT_ANALYTICS) return row except Exception as e: self._logger.error("Exception: analytics_start_time Failure %s" % e) return None
def query(pool): logging.info("-" * 120) # ------------------------------ logging.info("-" * 120) # ------------------------------ cf_logs = ColumnFamily(pool, CF_LOGS) for obj in cf_logs.get_range(): #@UnusedVariable print "Key: {0}".format(obj[0]) # print dir(obj[1]) for k, v in obj[1].iteritems(): print " {0} -> {1}".format(k, v)
def setup_class(cls): sys = SystemManager() sys.create_column_family(TEST_KS, 'StdLong', comparator_type=LongType()) sys.create_column_family(TEST_KS, 'StdInteger', comparator_type=IntegerType()) sys.create_column_family(TEST_KS, 'StdBigInteger', comparator_type=IntegerType()) sys.create_column_family(TEST_KS, 'StdTimeUUID', comparator_type=TimeUUIDType()) sys.create_column_family(TEST_KS, 'StdLexicalUUID', comparator_type=LexicalUUIDType()) sys.create_column_family(TEST_KS, 'StdAscii', comparator_type=AsciiType()) sys.create_column_family(TEST_KS, 'StdUTF8', comparator_type=UTF8Type()) sys.create_column_family(TEST_KS, 'StdBytes', comparator_type=BytesType()) sys.create_column_family(TEST_KS, 'StdComposite', comparator_type=CompositeType(LongType(), BytesType())) sys.close() cls.cf_long = ColumnFamily(pool, 'StdLong') cls.cf_int = ColumnFamily(pool, 'StdInteger') cls.cf_big_int = ColumnFamily(pool, 'StdBigInteger') cls.cf_time = ColumnFamily(pool, 'StdTimeUUID') cls.cf_lex = ColumnFamily(pool, 'StdLexicalUUID') cls.cf_ascii = ColumnFamily(pool, 'StdAscii') cls.cf_utf8 = ColumnFamily(pool, 'StdUTF8') cls.cf_bytes = ColumnFamily(pool, 'StdBytes') cls.cf_composite = ColumnFamily(pool, 'StdComposite') cls.cfs = [cls.cf_long, cls.cf_int, cls.cf_time, cls.cf_lex, cls.cf_ascii, cls.cf_utf8, cls.cf_bytes, cls.cf_composite]
def ensure_cassandra_cf(self): s = SystemManager() if self.keyspace not in s.list_keyspaces(): s.create_keyspace(self.keyspace, SIMPLE_STRATEGY, {'replication_factor': '1'}) if self.cassandra_columns_family not in s.get_keyspace_column_families( self.keyspace): s.create_column_family(self.keyspace, self.cassandra_columns_family) self.columnfamily = ColumnFamily(self.cassandra_session, self.cassandra_columns_family)
def _get_analytics_start_time(self): try: col_family = ColumnFamily(self._pool, SYSTEM_OBJECT_TABLE) row = col_family.get(SYSTEM_OBJECT_ANALYTICS, columns=[SYSTEM_OBJECT_START_TIME]) except Exception as e: self._logger.error("Exception: analytics_start_time Failure %s" % e) return -1 else: return row[SYSTEM_OBJECT_START_TIME]
def verify_with_thrift(self): # No more thrift in 4.0 if self.cluster.version() >= '4': return pool = ConnectionPool("supcols", pool_size=1) super_col_fam = ColumnFamily(pool, "cols") for name in NAMES: super_col_value = super_col_fam.get(name) self.assertEqual(OrderedDict([(('attr', u'name'), name)]), super_col_value)
def get_data(self, cf_name, key, start_time, end_time, output_json=False): cf = ColumnFamily(self.pool, cf_name) try: result = cf.multiget(self.gen_key_range(key, start_time, end_time), column_start=start_time * 1000, column_finish=end_time * 1000, column_count=10000000) if output_json: self.dump_json(result) except NotFoundException: pass
def test_alter_column_non_bytes_type(self): sys.create_column_family(TEST_KS, 'LongCF', comparator_type=LONG_TYPE) sys.create_index(TEST_KS, 'LongCF', 3, LONG_TYPE) pool = ConnectionPool(TEST_KS) cf = ColumnFamily(pool, 'LongCF') cf.insert('key', {3: 3}) assert_equal(cf.get('key')[3], 3) sys.alter_column(TEST_KS, 'LongCF', 2, LONG_TYPE) cf = ColumnFamily(pool, 'LongCF') cf.insert('key', {2: 2}) assert_equal(cf.get('key')[2], 2)
def simple_select(self, columnfamily, *args): slice = ['', '', self.max_rows] key = None if args and args[1]: if ':' not in args[1]: key = args[1] for i, part in enumerate(args[1].split(':', 2)): slice[i] = part try: cf = ColumnFamily(self.pool, columnfamily) except NotFoundException: return super(CCli, self).default(' '.join([columnfamily] + list(args))) if key: pt = PrettyTable() pt.field_names = ['Key', key] pt.align["Key"] = "l" pt.align[key] = 'r' for k, v in cf.get(key).items(): pt.add_row([k, (v[:self.max_data_size - 3] + '...' if self.max_data_size and len(v) > self.max_data_size else v)]) print pt.get_string(sortby='Key') return data = dict(cf.get_range(start=slice[0], finish=slice[1], row_count=int(slice[2]))) columns = [] for key, row in data.items(): columns.extend(row.keys()) columns = list(set(columns)) columns.sort() pt = PrettyTable() pt.field_names = ['Key / Column'] + columns pt.align["Key / Column"] = "l" for column in columns: pt.align[column] = "r" for key, row in data.items(): prow = [key] for column in columns: value = row.get(column, '---') if len(value) > self.max_data_size: value = value[:self.max_data_size - 3] + '...' prow.append(value) pt.add_row(prow) print pt.get_string(sortby='Key / Column')
def column_family_remove(self, machine_id, keyspace_name, column_family_name, key): """Remove a key from column family for a given keyspace """ if (self.keyspace_contains(keyspace_name, column_family_name) == False): print "Error : Keyspace:column family could not be found." return False pool = ConnectionPool(keyspace=keyspace_name, server_list=keyspace.server_ips, prefill=False) col_fam = ColumnFamily(pool, column_family_name) col_fam.remove(key) return True
def test_alter_column_family_default_validation_class(self): sys.create_column_family(TEST_KS, 'AlteredCF', default_validation_class=LONG_TYPE) pool = ConnectionPool(TEST_KS) cf = ColumnFamily(pool, 'AlteredCF') assert_equal(cf.default_validation_class, "LongType") sys.alter_column_family(TEST_KS, 'AlteredCF', default_validation_class=UTF8_TYPE) cf = ColumnFamily(pool, 'AlteredCF') assert_equal(cf.default_validation_class, "UTF8Type")
def _get_analytics_ttls_thrift(self): ret_row = {} try: col_family = ColumnFamily(self._pool, SYSTEM_OBJECT_TABLE) row = col_family.get(SYSTEM_OBJECT_ANALYTICS) except Exception as e: self._logger.error("Exception: analytics_start_time Failure %s" % e) ret_row[SYSTEM_OBJECT_FLOW_DATA_TTL] = AnalyticsFlowTTL ret_row[SYSTEM_OBJECT_STATS_DATA_TTL] = AnalyticsStatisticsTTL ret_row[SYSTEM_OBJECT_CONFIG_AUDIT_TTL] = AnalyticsConfigAuditTTL ret_row[SYSTEM_OBJECT_GLOBAL_DATA_TTL] = AnalyticsTTL return (ret_row, -1) return (row, 0)
def initialize( self, cassandra_session, ): """ Initializer of the received request. Args: cassandra_session(pycassa.pool.ConnectionPool) """ self.column_family = ColumnFamily( cassandra_session, COLUMN_FAMILY, )
def setup_class(cls): sys = SystemManager() sys.create_column_family(TEST_KS, 'SuperLongSubLong', super=True, comparator_type=LongType(), subcomparator_type=LongType()) sys.create_column_family(TEST_KS, 'SuperLongSubInt', super=True, comparator_type=LongType(), subcomparator_type=IntegerType()) sys.create_column_family(TEST_KS, 'SuperLongSubBigInt', super=True, comparator_type=LongType(), subcomparator_type=IntegerType()) sys.create_column_family(TEST_KS, 'SuperLongSubTime', super=True, comparator_type=LongType(), subcomparator_type=TimeUUIDType()) sys.create_column_family(TEST_KS, 'SuperLongSubLex', super=True, comparator_type=LongType(), subcomparator_type=LexicalUUIDType()) sys.create_column_family(TEST_KS, 'SuperLongSubAscii', super=True, comparator_type=LongType(), subcomparator_type=AsciiType()) sys.create_column_family(TEST_KS, 'SuperLongSubUTF8', super=True, comparator_type=LongType(), subcomparator_type=UTF8Type()) sys.create_column_family(TEST_KS, 'SuperLongSubBytes', super=True, comparator_type=LongType(), subcomparator_type=BytesType()) sys.close() cls.cf_suplong_sublong = ColumnFamily(pool, 'SuperLongSubLong') cls.cf_suplong_subint = ColumnFamily(pool, 'SuperLongSubInt') cls.cf_suplong_subbigint = ColumnFamily(pool, 'SuperLongSubBigInt') cls.cf_suplong_subtime = ColumnFamily(pool, 'SuperLongSubTime') cls.cf_suplong_sublex = ColumnFamily(pool, 'SuperLongSubLex') cls.cf_suplong_subascii = ColumnFamily(pool, 'SuperLongSubAscii') cls.cf_suplong_subutf8 = ColumnFamily(pool, 'SuperLongSubUTF8') cls.cf_suplong_subbytes = ColumnFamily(pool, 'SuperLongSubBytes') cls.cfs = [cls.cf_suplong_subint, cls.cf_suplong_subint, cls.cf_suplong_subtime, cls.cf_suplong_sublex, cls.cf_suplong_subascii, cls.cf_suplong_subutf8, cls.cf_suplong_subbytes]
def test_get_indexed_slices(self): sys = SystemManager() for cf, keys in self.type_groups: sys.create_index(TEST_KS, cf.column_family, 'birthdate', LongType()) cf = ColumnFamily(pool, cf.column_family) for key in keys: cf.insert(key, {'birthdate': 1}) expr = create_index_expression('birthdate', 1) clause = create_index_clause([expr]) rows = list(cf.get_indexed_slices(clause)) assert_equal(len(rows), len(keys)) for k, c in rows: assert_true(k in keys) assert_equal(c, {'birthdate': 1})