def store(self, column_names, fields, raw_data_rows): get_timestamp = operator.itemgetter(1) for timestamp, grouped_rows in grouped_by(raw_data_rows, get_timestamp): rows = [ (dn, values) for dn, _, values in grouped_rows ] entity_ref = EntityDnRef(rows[0][0]) with closing(self.conn.cursor()) as cursor: datasource = DataSource.from_name(cursor, self.datasource) entitytype = entity_ref.get_entitytype(cursor) trendstore = TrendStore.get( cursor, datasource, entitytype, self.granularity ) if not trendstore: partition_size = 86400 trendstore = TrendStore(datasource, entitytype, self.granularity, partition_size, "table").create(cursor) self.conn.commit() utc_timestamp = timestamp.astimezone(pytz.utc) utc_timestamp_str = self.offset(utc_timestamp).strftime("%Y-%m-%dT%H:%M:%S") raw_datapackage = RawDataPackage( self.granularity, utc_timestamp_str, column_names, rows) trendstore.store_raw(raw_datapackage).run(self.conn)
def test_store_copy_from_2(conn): trend_names = ['CCR', 'CCRatts', 'Drops'] data_rows = [ (10023, ('0.9919', '2105', '17')) ] data_types = ['integer', 'smallint', 'smallint'] curr_timezone = timezone("Europe/Amsterdam") timestamp = curr_timezone.localize(datetime(2013, 1, 2, 10, 45, 0)) modified = curr_timezone.localize(datetime.now()) granularity = create_granularity("900") with closing(conn.cursor()) as cursor: datasource = name_to_datasource(cursor, "test-src010") entitytype = name_to_entitytype(cursor, "test-type002") trendstore = TrendStore(datasource, entitytype, granularity, 86400, "table").create(cursor) partition = trendstore.partition(timestamp) partition.create(cursor) partition.check_columns_exist(trend_names, data_types)(cursor) table = partition.table() store_copy_from(conn, SCHEMA, table.name, trend_names, timestamp, modified, data_rows) conn.commit() eq_(row_count(cursor, table), 1) table.select(Call("max", Column("modified"))).execute(cursor) max_modified = first(cursor.fetchone()) eq_(max_modified, modified)
def create_trendstore(self, datasource, entitytype, granularity): partition_size = PARTITION_SIZES[str(granularity)] trendstore = TrendStore(datasource, entitytype, granularity, partition_size, 'table') with closing(self.conn.cursor()) as cursor: return trendstore.create(cursor)
def test_store_copy_from(conn, dataset): partition_size = 86400 with closing(conn.cursor()) as cursor: trendstore = TrendStore(dataset.datasource, dataset.entitytype, dataset.granularity, partition_size, "table").create(cursor) conn.commit() timestamp = dataset.datasource.tzinfo.localize( datetime.datetime(2013, 4, 25, 9, 45)) trends = ["a", "b", "c"] def make_row(index): return (1234 + index, [1, 2, 3 + index]) rows = map(make_row, range(100)) datapackage = DataPackage(dataset.granularity, timestamp, trends, rows) transaction = trendstore.store(datapackage) transaction.run(conn) transaction = trendstore.store(datapackage) transaction.run(conn)
def get_partition(cursor, datasource_name, entitytype_name, granularity, timestamp): datasource = get_datasource(cursor, datasource_name) entitytype = get_entitytype(cursor, entitytype_name) granularity = create_granularity(granularity) trendstore = TrendStore(datasource, entitytype, granularity) return trendstore.partition(timestamp)
def test_create_trendstore(conn, dataset): partition_size = 3600 trendstore = TrendStore(dataset.datasource, dataset.entitytype, dataset.granularity, partition_size, "table") with closing(conn.cursor()) as cursor: trendstore.create(cursor) assert isinstance(trendstore, TrendStore) assert trendstore.id is not None
def test_update_and_modify_columns_fractured(conn): curr_timezone = timezone("Europe/Amsterdam") granularity = create_granularity("900") timestamp = curr_timezone.localize(datetime(2013, 1, 2, 10, 45, 0)) entity_ids = range(1023, 1023 + 100) trend_names_a = ["CellID", "CCR", "Drops"] data_rows_a = [(i, ("10023", "0.9919", "17")) for i in entity_ids] data_types_a = extract_data_types(data_rows_a) trend_names_b = ["CellID", "Drops"] data_rows_b = [(i, ("10023", "19")) for i in entity_ids] data_types_b = extract_data_types(data_rows_b) with closing(conn.cursor()) as cursor: datasource = name_to_datasource(cursor, "test-src009") entitytype = name_to_entitytype(cursor, "test-type001") trendstore = TrendStore(datasource, entitytype, granularity, 86400, "table").create(cursor) partition = trendstore.partition(timestamp) table = partition.table() partition.create(cursor) partition.check_columns_exist(trend_names_a, data_types_a)(cursor) conn.commit() store(conn, SCHEMA, table.name, trend_names_a, timestamp, data_rows_a) time.sleep(0.2) check_columns = map(Column, ["modified", "Drops"]) query = table.select(check_columns) with closing(conn.cursor()) as cursor: query.execute(cursor) row_before = cursor.fetchone() store(conn, SCHEMA, table.name, trend_names_b, timestamp, data_rows_b) query = table.select(check_columns) with closing(conn.cursor()) as cursor: query.execute(cursor) row_after = cursor.fetchone() assert_not_equal(row_before[0], row_after[0]) assert_not_equal(row_before[1], row_after[1])
def test_store_copy_from_1(conn): trend_names = ['CellID', 'CCR', 'CCRatts', 'Drops'] data_rows = [ (10023, ('10023', '0.9919', '2105', '17')), (10047, ('10047', '0.9963', '4906', '18')), (10048, ('10048', '0.9935', '2448', '16')), (10049, ('10049', '0.9939', '5271', '32')), (10050, ('10050', '0.9940', '3693', '22')), (10051, ('10051', '0.9944', '3753', '21')), (10052, ('10052', '0.9889', '2168', '24')), (10053, ('10053', '0.9920', '2372', '19')), (10085, ('10085', '0.9987', '2282', '3')), (10086, ('10086', '0.9972', '1763', '5')), (10087, ('10087', '0.9931', '1453', '10')) ] curr_timezone = timezone("Europe/Amsterdam") data_types = extract_data_types(data_rows) timestamp = curr_timezone.localize(datetime(2013, 1, 2, 10, 45, 0)) granularity = create_granularity("900") modified = curr_timezone.localize(datetime.now()) with closing(conn.cursor()) as cursor: datasource = name_to_datasource(cursor, "test-src009") entitytype = name_to_entitytype(cursor, "test-type001") trendstore = TrendStore(datasource, entitytype, granularity, 86400, "table").create(cursor) partition = trendstore.partition(timestamp) table = partition.table() partition.create(cursor) partition.check_columns_exist(trend_names, data_types)(cursor) store_copy_from(conn, SCHEMA, table.name, trend_names, timestamp, modified, data_rows) conn.commit() eq_(row_count(cursor, table), 11) table.select(Call("max", Column("modified"))).execute(cursor) max_modified = first(cursor.fetchone()) eq_(max_modified, modified)
def test_update_modified_column(conn): curr_timezone = timezone("Europe/Amsterdam") trend_names = ['CellID', 'CCR', 'Drops'] data_rows = [ (10023, ('10023', '0.9919', '17')), (10047, ('10047', '0.9963', '18')) ] data_types = extract_data_types(data_rows) update_data_rows = [(10023, ('10023', '0.9919', '17'))] timestamp = curr_timezone.localize(datetime.now()) granularity = create_granularity("900") with closing(conn.cursor()) as cursor: datasource = name_to_datasource(cursor, "test-src009") entitytype = name_to_entitytype(cursor, "test-type001") trendstore = TrendStore(datasource, entitytype, granularity, 86400, "table").create(cursor) partition = trendstore.partition(timestamp) table = partition.table() partition.create(cursor) partition.check_columns_exist(trend_names, data_types)(cursor) store(conn, SCHEMA, table.name, trend_names, timestamp, data_rows) time.sleep(1) store(conn, SCHEMA, table.name, trend_names, timestamp, update_data_rows) conn.commit() query = table.select([Column("modified")]) query.execute(cursor) modified_list = [modified for modified in cursor.fetchall()] assert_not_equal(modified_list[0], modified_list[1]) table.select(Call("max", Column("modified"))).execute(cursor) max_modified = first(cursor.fetchone()) modified_table.select(Column("end")).where_( Eq(Column("table_name"), table.name)).execute(cursor) end = first(cursor.fetchone()) eq_(end, max_modified)
def test_create_trendstore_with_children(self): granularity = create_granularity("900") partition_size = 3600 with closing(self.conn.cursor()) as cursor: trendstore = TrendStore(self.datasource, self.entitytype, granularity, partition_size, "table").create( cursor ) assert trendstore.id is not None timestamp = self.datasource.tzinfo.localize(datetime.datetime(2013, 5, 6, 14, 45)) partition = trendstore.partition(timestamp) partition.create(cursor)
def test_create_trendstore_with_children(conn, dataset): partition_size = 3600 trendstore = TrendStore(dataset.datasource, dataset.entitytype, dataset.granularity, partition_size, "table") with closing(conn.cursor()) as cursor: trendstore.create(cursor) assert trendstore.id is not None timestamp = dataset.datasource.tzinfo.localize( datetime.datetime(2013, 5, 6, 14, 45)) partition = trendstore.partition(timestamp) partition.create(cursor)
def test_check_column_types(self): granularity = create_granularity("900") partition_size = 3600 trendstore = TrendStore(self.datasource, self.entitytype, granularity, partition_size, "table") with closing(self.conn.cursor()) as cursor: trendstore.create(cursor) column_names = ["counter1", "counter2"] initial_data_types = ["smallint", "smallint"] data_types = ["integer", "text"] check_columns_exist = trendstore.check_columns_exist(column_names, initial_data_types) check_columns_exist(cursor) check_column_types = trendstore.check_column_types(column_names, data_types) check_column_types(cursor)
def retrieve_related_trend(conn, database_srid, region, region_srid, datasource, entitytype, attribute_name, granularity_str, timestamp, limit=None): granularity = create_granularity(granularity_str) with closing(conn.cursor()) as cursor: trendstore = TrendStore.get(cursor, datasource, entitytype, granularity) partition = trendstore.partition(timestamp) table = partition.table() full_base_tbl_name = table.render() relation_name = get_relation_name(conn, "Cell", entitytype.name) relation_cell_site_name = get_relation_name(conn, "Cell", "Site") bbox2d = transform_srid(set_srid(make_box_2d(region), region_srid), database_srid) query = ( "SELECT r.source_id, r.target_id, base_table.\"{0}\" " "FROM {1} base_table " "JOIN relation.\"{2}\" r ON r.target_id = base_table.entity_id " "JOIN relation.\"{3}\" site_rel on site_rel.source_id = r.source_id " "JOIN gis.site site ON site.entity_id = site_rel.target_id " "AND site.position && {4} " "WHERE base_table.\"timestamp\" = %(timestamp)s").format( attribute_name, full_base_tbl_name, relation_name, relation_cell_site_name, bbox2d) args = { "left": region["left"], "bottom": region["bottom"], "right": region["right"], "top": region["top"], "timestamp": timestamp} with closing(conn.cursor()) as cursor: try: cursor.execute(query, args) except psycopg2.ProgrammingError: conn.rollback() rows = [] else: rows = cursor.fetchall() result = {} for entity_id, related_entity_id, value in rows: if entity_id not in result: result[entity_id] = {} result[entity_id][related_entity_id] = value return result
def get_table_names_v4(cursor, datasources, granularity, entitytype, start, end): """ A get_table_names like function that supports both v3 and v4 trendstores. """ if isinstance(granularity, int): granularity = create_granularity(granularity) trendstores = [TrendStore.get(cursor, datasource, entitytype, granularity) for datasource in datasources] return get_table_names(trendstores, start, end)
def test_get_trendstore(conn, dataset): partition_size = 3600 with closing(conn.cursor()) as cursor: TrendStore(dataset.datasource, dataset.entitytype, dataset.granularity, partition_size, "table").create(cursor) trendstore = TrendStore.get(cursor, dataset.datasource, dataset.entitytype, dataset.granularity) eq_(trendstore.datasource.id, dataset.datasource.id) eq_(trendstore.partition_size, partition_size) assert trendstore.id is not None, "trendstore.id is None" eq_(trendstore.version, 4)
def test_get_by_id(self): granularity = create_granularity("900") partition_size = 3600 type = "table" with closing(self.conn.cursor()) as cursor: t = TrendStore(self.datasource, self.entitytype, granularity, partition_size, type).create(cursor) trendstore = TrendStore.get_by_id(cursor, t.id) eq_(trendstore.datasource.id, self.datasource.id) eq_(trendstore.partition_size, partition_size) assert trendstore.id is not None, "trendstore.id is None" eq_(trendstore.version, 4)
def get_or_create_trendstore(cursor, datasource, entitytype, granularity): trendstore = TrendStore.get(cursor, datasource, entitytype, granularity) if trendstore is None: partition_size = PARTITION_SIZES.get(granularity.name) if partition_size is None: raise Exception("unsupported granularity size '{}'".format( granularity.name)) return TrendStore(datasource, entitytype, granularity, partition_size, "table").create(cursor) else: return trendstore
def test_update(conn): trend_names = ["CellID", "CCR", "Drops"] data_rows = [ (10023, ("10023", "0.9919", "17")), (10047, ("10047", "0.9963", "18")) ] data_types = extract_data_types(data_rows) update_data_rows = [(10023, ("10023", "0.5555", "17"))] timestamp = datetime.now() granularity = create_granularity("900") with closing(conn.cursor()) as cursor: datasource = name_to_datasource(cursor, "test-src009") entitytype = name_to_entitytype(cursor, "test-type001") trendstore = TrendStore(datasource, entitytype, granularity, 86400, "table").create(cursor) partition = trendstore.partition(timestamp) table = partition.table() partition.create(cursor) partition.check_columns_exist(trend_names, data_types)(cursor) store(conn, SCHEMA, table.name, trend_names, timestamp, data_rows) store(conn, SCHEMA, table.name, trend_names, timestamp, update_data_rows) conn.commit() query = table.select([Column("modified"), Column("CCR")]) with closing(conn.cursor()) as cursor: query.execute(cursor) rows = cursor.fetchall() assert_not_equal(rows[0][0], rows[1][0]) assert_not_equal(rows[0][1], rows[1][1])
def test_store_alter_column(conn, dataset): partition_size = 86400 trendstore = TrendStore(dataset.datasource, dataset.entitytype, dataset.granularity, partition_size, "table") with closing(conn.cursor()) as cursor: trendstore.create(cursor) conn.commit() timestamp = dataset.datasource.tzinfo.localize( datetime.datetime(2013, 4, 25, 11, 00)) trends = ["a", "b", "c"] rows = [ (1234, [1, 2, 3]), (2345, [4, 5, 6])] datapackage = DataPackage(dataset.granularity, timestamp, trends, rows) transaction = trendstore.store(datapackage) transaction.run(conn) table = trendstore.partition(timestamp).table() condition = And( Eq(Column("entity_id"), 2345), Eq(Column("timestamp"), timestamp)) query = table.select(Column("c")).where_(condition) with closing(conn.cursor()) as cursor: query.execute(cursor) c, = cursor.fetchone() eq_(c, 6) trends = ["a", "b", "c"] rows = [ (2345, [4, 5, "2013-04-25 11:00:00"])] datapackage = DataPackage(dataset.granularity, timestamp, trends, rows) transaction = trendstore.store(datapackage) transaction.run(conn) with closing(conn.cursor()) as cursor: query.execute(cursor) c, = cursor.fetchone() eq_(c, datetime.datetime(2013, 4, 25, 11, 0, 0))
def load(self, cursor): self.datasource = name_to_datasource(cursor, "testset1") self.entitytype = name_to_entitytype(cursor, self.entitytype_name) self.entities = map(partial(dn_to_entity, cursor), self.dns) datapackage = generate_datapackage_a(self.granularity, self.timestamp, self.entities) self.trendstore = TrendStore.get(cursor, self.datasource, self.entitytype, self.granularity) if not self.trendstore: self.trendstore = TrendStore(self.datasource, self.entitytype, self.granularity, partition_size=86400, type="table").create(cursor) self.partition = store_datapackage(cursor, self.trendstore, datapackage, self.modified)
def load(self, cursor): entitytype = name_to_entitytype(cursor, "materialize_dummytype001") self.datasource = name_to_datasource(cursor, "materialize_src_normal001") view_datasource = name_to_datasource(cursor, "vmaterialize_normal001") granularity = create_granularity('900') self.timestamp = self.datasource.tzinfo.localize( datetime.datetime(2013, 8, 26, 22, 0, 0)) trend_names = ["cntr"] rows_small = [ (1234, (55,)), (1235, (56,))] self.small_datapackage = DataPackage(granularity, self.timestamp, trend_names, rows_small) rows_large = [ (1234, (55243444334,)), (1235, (56242343242,))] self.large_datapackage = DataPackage(granularity, self.timestamp, trend_names, rows_large) self.trendstore = TrendStore(self.datasource, entitytype, granularity, 86400, 'table') self.trendstore.create(cursor) partition = self.trendstore.partition(self.timestamp) partition.create(cursor) self.trendstore.check_columns_exist(trend_names, ["smallint"])(cursor) modified = self.datasource.tzinfo.localize(datetime.datetime.now()) store_copy_from(cursor, partition.table(), self.small_datapackage, modified) mark_modified(cursor, partition.table(), self.timestamp, modified) view_trendstore = TrendStore(view_datasource, entitytype, granularity, 0, 'view').create(cursor) sql = ( "SELECT " "entity_id, " "timestamp, " 'cntr FROM {}').format(self.trendstore.base_table().render()) self.view = View(view_trendstore, sql).define(cursor).create(cursor)
def test_create_view(conn): testset_small = TestSet1Small() with closing(conn.cursor()) as cursor: testset_small.load(cursor) datasource = name_to_datasource(cursor, "view-test") trendstore = TrendStore.get(cursor, datasource, testset_small.entitytype, testset_small.granularity) if not trendstore: trendstore = TrendStore(datasource, testset_small.entitytype, testset_small.granularity, partition_size=86400, type="view").create(cursor) view_sql = ( "SELECT " "999 AS entity_id, " "'2013-08-26 13:00:00+02:00'::timestamp with time zone AS timestamp, " '10 AS "CntrA"') view = View(trendstore, view_sql).define(cursor).create(cursor) conn.commit() plugin = get_plugin("trend") instance_v4 = plugin(conn, api_version=4) start = testset_small.datasource.tzinfo.localize(datetime.datetime(2013, 8, 26, 13, 0, 0)) end = start result = instance_v4.retrieve(trendstore, ["CntrA"], None, start, end) eq_(len(result), 1)
def get_trendstore(self, datasource, entitytype, granularity): with closing(self.conn.cursor()) as cursor: return TrendStore.get(cursor, datasource, entitytype, granularity)
def test_run(conn): with closing(conn.cursor()) as cursor: reset_db(cursor) conn.commit() minerva_context = MinervaContext(conn, conn) source_granularity = create_granularity("900") dest_granularity = create_granularity("900") with closing(conn.cursor()) as cursor: source_datasource_1 = get_dummy_datasource(cursor, "dummy-src-1") source_datasource_2 = get_dummy_datasource(cursor, "dummy-src-2") dest_datasource = get_dummy_datasource(cursor, "dummy-transform-src") entitytype = get_dummy_entitytype(cursor, dummy_type_name) partition_size = 86400 trendstore_1 = TrendStore( source_datasource_1, entitytype, source_granularity, partition_size, "table") trendstore_1.create(cursor) trendstore_2 = TrendStore( source_datasource_2, entitytype, source_granularity, partition_size, "table") trendstore_2.create(cursor) result_trendstore = TrendStore( dest_datasource, entitytype, dest_granularity, partition_size, "table") result_trendstore.create(cursor) function_mappings = [ add_function_mapping(cursor, None, ["counter_a"], "identity_a"), add_function_mapping(cursor, None, ["counter_b"], "identity_b"), add_function_mapping(cursor, None, ["counter_c"], "identity_c"), add_function_mapping(cursor, "add", ["counter_a", "counter_b"], "add_a_b"), add_function_mapping(cursor, "multiply", ["counter_a", "300"], "a_times_300")] function_mapping_ids = [fm.id for fm in function_mappings] function_set_qtr = add_function_set(cursor, "test_set", "", function_mapping_ids, [source_datasource_1.id, source_datasource_2.id], entitytype.id, source_granularity.name, dest_datasource.id, entitytype.id, dest_granularity.name, None, [], None, True) entities = map(partial(get_or_create_entity, cursor), dns) conn.commit() source_1 = create_source_1(source_granularity, entities) def store_modified_at(trendstore, datapackage, modified): def set_modified(state): state["modified"] = modified partition = trendstore.partition(datapackage.timestamp) set_modified_action = UpdateState(set_modified) copy_from = CopyFrom(k(partition), k(datapackage)) return DbTransaction(set_modified_action, copy_from) transaction = store_modified_at(trendstore_1, source_1, modified_a) transaction.run(conn) source_2 = create_source_2(source_granularity, entities) transaction = store_modified_at(trendstore_2, source_2, modified_a) transaction.run(conn) result_partition = result_trendstore.partition(timestamp) result_table = result_partition.table() conn.commit() logging.debug("source_1") logging.debug(unlines(render_datapackage(source_1))) logging.debug("source_2") logging.debug(unlines(render_datapackage(source_2))) dest_timestamp = timestamp transformation = Transformation(function_set_qtr, dest_timestamp) transformation.execute(minerva_context) columns = map(Column, ["entity_id", "identity_a", "identity_b", "add_a_b", "a_times_300"]) query = result_table.select(columns) with closing(conn.cursor()) as cursor: query.execute(cursor) logging.debug(unlines(render_result(cursor))) src_table_1 = trendstore_1.partition(timestamp).table() query = src_table_1.select(Call("max", Column("modified"))) query.execute(cursor) src1_max_modified = first(cursor.fetchone()) src_table_2 = trendstore_2.partition(timestamp).table() query = src_table_2.select(Call("max", Column("modified"))) query.execute(cursor) src2_max_modified = first(cursor.fetchone()) query = modified_table.select(Column("end")).where_(Eq(Column("table_name"), result_table.name)) query.execute(cursor) query = state_table.select(Column("processed_max_modified")).where_(Eq(Column("function_set_id"))) query.execute(cursor, (function_set_qtr.id,)) processed_max_modified = first(cursor.fetchone()) eq_(max(src1_max_modified, src2_max_modified), processed_max_modified)