def function_set_from_row(cursor, row): id, name, description, mapping_signature, source_datasource_ids, \ source_entitytype_id, source_granularity_str, dest_datasource_id, \ dest_entitytype_id, dest_granularity_str, filter_sub_query, group_by, \ relation_type_id, enabled = row get_datasource = partial(get_datasource_by_id, cursor) get_entitytype = partial(get_entitytype_by_id, cursor) source_granularity = create_granularity(str(source_granularity_str)) dest_granularity = create_granularity(str(dest_granularity_str)) source_datasources = map(get_datasource, source_datasource_ids) source_entitytype = get_entitytype(source_entitytype_id) dest_datasource = get_datasource(dest_datasource_id) dest_entitytype = get_entitytype(dest_entitytype_id) if relation_type_id is None: relation_table = None else: relation_table = get_table_from_type_id(cursor, relation_type_id) dest_trendstore = get_trendstore(cursor, dest_datasource, dest_entitytype, dest_granularity) if not dest_trendstore: version = 4 partition_size = PARTITION_SIZES[str(dest_granularity)] dest_trendstore = create_trendstore(cursor, dest_datasource, dest_entitytype, dest_granularity, partition_size, "table", version) logging.info("created trendstore {}".format(dest_trendstore)) def map_to_trendstore(cursor, datasource, entitytype, granularity): trendstore = get_trendstore(cursor, datasource, source_entitytype, source_granularity) if trendstore is None: msg = ( "missing trendstore for datasource {} {}, entitytype {} {}, " "granularity {}").format(datasource.id, datasource.name, entitytype.id, entitytype.name, granularity.name) logging.info(msg) return trendstore source_trendstores = [map_to_trendstore(cursor, datasource, source_entitytype, source_granularity) for datasource in source_datasources] return FunctionSet(id, name, description, mapping_signature, source_trendstores, dest_trendstore, filter_sub_query, group_by, relation_table, enabled)
def get_partition_by_name(cursor, name): column_names = [ "table_name", "datasource_id", "entitytype_id", "granularity", "data_start", "data_end"] columns = map(Column, column_names) query = schema.partition.select(columns, where_=Eq(Column("table_name"))) args = name, query.execute(cursor, args) if cursor.rowcount > 0: name, datasource_id, entitytype_id, granularity_str, data_start, \ data_end = cursor.fetchone() granularity = create_granularity(str(granularity_str)) datasource = get_datasource_by_id(cursor, datasource_id) entitytype = get_entitytype_by_id(cursor, entitytype_id) trendstore = TrendStore(datasource, entitytype, granularity) return Partition(name, trendstore, data_start, data_end) else: return None
def test_get_aggregate_shard(self): awacs_query = [{"type": "C", "value": ["dummy_type"]}] granularity = create_granularity("900") formula = "SUM(Drops)" shard_indexes = [15680] with closing(self.conn.cursor()) as cursor: entitytype_cell = name_to_entitytype(cursor, 'dummy_type') sql, args, entity_id_column = compile_sql(awacs_query, None) select_statement = "SELECT {} AS id {}".format(entity_id_column, sql) entities_query = cursor.mogrify(select_statement, args) get_shard = partial(get_aggregate_shard, self.conn, entities_query, entitytype_cell.id, granularity, formula) shards = map(get_shard, shard_indexes) for shard in shards: logging.debug("{} - {}".format(shard[0], shard[-1])) eq_(len(shards), len(shard_indexes))
def test_store_copy_from_2(conn): trend_names = ['CCR', 'CCRatts', 'Drops'] data_rows = [ (10023, ('0.9919', '2105', '17')) ] data_types = ['integer', 'smallint', 'smallint'] curr_timezone = timezone("Europe/Amsterdam") timestamp = curr_timezone.localize(datetime(2013, 1, 2, 10, 45, 0)) modified = curr_timezone.localize(datetime.now()) granularity = create_granularity("900") with closing(conn.cursor()) as cursor: datasource = name_to_datasource(cursor, "test-src010") entitytype = name_to_entitytype(cursor, "test-type002") trendstore = TrendStore(datasource, entitytype, granularity, 86400, "table").create(cursor) partition = trendstore.partition(timestamp) partition.create(cursor) partition.check_columns_exist(trend_names, data_types)(cursor) table = partition.table() store_copy_from(conn, SCHEMA, table.name, trend_names, timestamp, modified, data_rows) conn.commit() eq_(row_count(cursor, table), 1) table.select(Call("max", Column("modified"))).execute(cursor) max_modified = first(cursor.fetchone()) eq_(max_modified, modified)
def parse(stream, file_name): line = stream.readline() yield TrendEngine.store( DefaultPackage(create_granularity('3600 seconds'), pytz.utc.localize(datetime(2015, 2, 27, 15, 0)), ['x'], [('Node=001', (int(line), ))]))
def __init__(self): self.timezone = timezone("Europe/Amsterdam") self.timestamp = self.timezone.localize(datetime(2012, 12, 6, 14, 15)) self.modified = self.timezone.localize(datetime(2012, 12, 6, 14, 36, 4)) self.trendstore = None self.granularity = create_granularity("900") self.entitytype_name = "dummy_type" self.dns = ["{}=node_{}".format(self.entitytype_name, i) for i in range(63020, 63025)]
def get_partition(cursor, datasource_name, entitytype_name, granularity, timestamp): datasource = get_datasource(cursor, datasource_name) entitytype = get_entitytype(cursor, entitytype_name) granularity = create_granularity(granularity) trendstore = TrendStore(datasource, entitytype, granularity) return trendstore.partition(timestamp)
def __init__(self, datasource, granularity, timestamp_is_start=False): self.datasource = datasource self.granularity = create_granularity(granularity) self.timestamp_is_start = timestamp_is_start self.conn = None if self.timestamp_is_start: self.offset = partial(offset_timestamp, timedelta(0, self.granularity.seconds)) else: self.offset = identity
def test_execute_gzipped(): file_path = '/tmp/data.csv.gz' value = 42 with gzip.open(file_path, 'wt') as test_file: test_file.write('{}\n'.format(value)) with closing(connect('')) as conn: clear_database(conn) with closing(conn.cursor()) as cursor: data_source = DataSource.create( 'pm-system-1', 'data source for integration test' )(cursor) entity_type = EntityType.create( 'Node', 'entity type for integration test' )(cursor) TableTrendStore.create(TableTrendStoreDescriptor( data_source, entity_type, create_granularity('3600 seconds'), [ TrendDescriptor('x', datatype.Integer, '') ], 86400 * 7 ))(cursor) conn.commit() job = HarvestJob( id_=1001, plugins={ 'test-data': TestPlugin() }, existence=Existence(conn), conn=conn, description={ "data_type": "test-data", "on_success": [ "do_nothing" ], "on_failure": [ "do_nothing" ], "parser_config": { "sub-type": "integer" }, "uri": file_path, "data_source": "pm-system-1" } ) job.execute()
def retrieve_related_trend(conn, database_srid, region, region_srid, datasource, entitytype, attribute_name, granularity_str, timestamp, limit=None): granularity = create_granularity(granularity_str) with closing(conn.cursor()) as cursor: trendstore = TrendStore.get(cursor, datasource, entitytype, granularity) partition = trendstore.partition(timestamp) table = partition.table() full_base_tbl_name = table.render() relation_name = get_relation_name(conn, "Cell", entitytype.name) relation_cell_site_name = get_relation_name(conn, "Cell", "Site") bbox2d = transform_srid(set_srid(make_box_2d(region), region_srid), database_srid) query = ( "SELECT r.source_id, r.target_id, base_table.\"{0}\" " "FROM {1} base_table " "JOIN relation.\"{2}\" r ON r.target_id = base_table.entity_id " "JOIN relation.\"{3}\" site_rel on site_rel.source_id = r.source_id " "JOIN gis.site site ON site.entity_id = site_rel.target_id " "AND site.position && {4} " "WHERE base_table.\"timestamp\" = %(timestamp)s").format( attribute_name, full_base_tbl_name, relation_name, relation_cell_site_name, bbox2d) args = { "left": region["left"], "bottom": region["bottom"], "right": region["right"], "top": region["top"], "timestamp": timestamp} with closing(conn.cursor()) as cursor: try: cursor.execute(query, args) except psycopg2.ProgrammingError: conn.rollback() rows = [] else: rows = cursor.fetchall() result = {} for entity_id, related_entity_id, value in rows: if entity_id not in result: result[entity_id] = {} result[entity_id][related_entity_id] = value return result
def get_table_names_v4(cursor, datasources, granularity, entitytype, start, end): """ A get_table_names like function that supports both v3 and v4 trendstores. """ if isinstance(granularity, int): granularity = create_granularity(granularity) trendstores = [TrendStore.get(cursor, datasource, entitytype, granularity) for datasource in datasources] return get_table_names(trendstores, start, end)
def test_create_trendstore(self): granularity = create_granularity("900") partition_size = 3600 with closing(self.conn.cursor()) as cursor: trendstore = TrendStore(self.datasource, self.entitytype, granularity, partition_size, "table").create( cursor ) assert isinstance(trendstore, TrendStore) assert trendstore.id is not None
def test_constructor(): granularity = create_granularity("900") timestamp = "2013-05-28 12:00:00" trend_names = ["counter_a", "counter_b", "counter_c"] rows = [ ("Network=SouthPole,Rnc=SP1,Rbs=AdmundsenScott1", ("34", "10.3", "334303")), ("Network=SouthPole,Rnc=SP1,Rbs=AdmundsenScott2", ("42", "8.5", "206441"))] raw_datapackage = RawDataPackage(granularity, timestamp, trend_names, rows) assert_true(raw_datapackage is not None)
def parse(stream, file_name): line = stream.readline() yield TrendEngine.store( DefaultPackage( create_granularity('3600 seconds'), pytz.utc.localize(datetime(2015, 2, 27, 15, 0)), ['x'], [ ('Node=001', (int(line), )) ] ) )
def load(self, cursor): self.entitytype = name_to_entitytype(cursor, self.entitytype_name) self.entities = map(partial(dn_to_entity, cursor), self.dns) granularity = create_granularity("900") # Data a self.datasource_a = name_to_datasource(cursor, "test-source-a") self.trendstore_a = TrendStore(self.datasource_a, self.entitytype, granularity, partition_size=86400, type="table").create(cursor) datapackage = generate_datapackage_a(granularity, self.timestamp_1, self.entities) self.partition_a = store_datapackage(cursor, self.trendstore_a, datapackage, self.modified) # Data b self.datasource_b = name_to_datasource(cursor, "test-source-b") self.trendstore_b = TrendStore(self.datasource_b, self.entitytype, granularity, partition_size=86400, type="table").create(cursor) datapackage = generate_datapackage_b(granularity, self.timestamp_1, self.entities) self.partition_b = store_datapackage(cursor, self.trendstore_b, datapackage, self.modified) # Data c self.datasource_c = name_to_datasource(cursor, "test-source-c") self.trendstore_c = TrendStore(self.datasource_c, self.entitytype, granularity, partition_size=86400, type="table").create(cursor) datapackage = generate_datapackage_c(granularity, self.timestamp_1, self.entities) self.partition_c = store_datapackage(cursor, self.trendstore_c, datapackage, self.modified) # Data d self.datasource_d = name_to_datasource(cursor, "test-source-d") self.trendstore_d = TrendStore(self.datasource_d, self.entitytype, granularity, partition_size=86400, type="table").create(cursor) datapackage_1 = generate_datapackage_d(granularity, self.timestamp_1, self.entities) self.partition_d_1 = store_datapackage(cursor, self.trendstore_d, datapackage_1, self.modified) datapackage_2 = generate_datapackage_d(granularity, self.timestamp_2, self.entities) self.partition_d_2 = store_datapackage(cursor, self.trendstore_d, datapackage_2, self.modified)
def test_get_by_id(self): granularity = create_granularity("900") partition_size = 3600 type = "table" with closing(self.conn.cursor()) as cursor: t = TrendStore(self.datasource, self.entitytype, granularity, partition_size, type).create(cursor) trendstore = TrendStore.get_by_id(cursor, t.id) eq_(trendstore.datasource.id, self.datasource.id) eq_(trendstore.partition_size, partition_size) assert trendstore.id is not None, "trendstore.id is None" eq_(trendstore.version, 4)
def view_from_row(cursor, row): id, description, datasource_id, entitytype_id, granularity, sql = row view = View() view.id = id view.sql = sql view.description = description view.datasource = get_datasource_by_id(cursor, datasource_id) view.entitytype = get_entitytype_by_id(cursor, entitytype_id) view.granularity = create_granularity(granularity) view.sources = get_sources_for_view(cursor, id) return view
def test_index_to_interval(): partition_size = 3600 partitioning = Partitioning(partition_size) # 0 = '1970-01-01T00:00:00+00:00' # (0, 0) = divmod(0, 3600) start, end = partitioning.index_to_interval(0) expected_start = pytz.utc.localize(datetime(1970, 1, 1, 0, 0)) expected_end = pytz.utc.localize(datetime(1970, 1, 1, 1, 0)) eq_(start, expected_start) eq_(end, expected_end) # 1365022800 = '2013-04-03T21:00:00+00:00' # (379173, 0) = divmod(1365022800, 3600) start, end = partitioning.index_to_interval(379173) expected_start = pytz.utc.localize(datetime(2013, 4, 3, 21, 0)) expected_end = pytz.utc.localize(datetime(2013, 4, 3, 22, 0)) eq_(start, expected_start) eq_(end, expected_end) partition_size = 4 * 86400 partitioning = Partitioning(partition_size) start, end = partitioning.index_to_interval(0) expected_start = pytz.utc.localize(datetime(1970, 1, 1, 0, 0)) expected_end = pytz.utc.localize(datetime(1970, 1, 5, 0, 0)) eq_(start, expected_start) eq_(end, expected_end) start, end = partitioning.index_to_interval(3963) expected_start = pytz.utc.localize(datetime(2013, 5, 27, 0, 0)) expected_end = pytz.utc.localize(datetime(2013, 5, 31, 0, 0)) eq_(start, expected_start) eq_(end, expected_end) granularity = create_granularity("86400") # Test if all timestamps in between match for t in granularity.range(expected_start, expected_end): print(t) ok_(expected_start <= t) ok_(t <= expected_end)
def test_update_modified_column(conn): curr_timezone = timezone("Europe/Amsterdam") trend_names = ['CellID', 'CCR', 'Drops'] data_rows = [ (10023, ('10023', '0.9919', '17')), (10047, ('10047', '0.9963', '18')) ] data_types = extract_data_types(data_rows) update_data_rows = [(10023, ('10023', '0.9919', '17'))] timestamp = curr_timezone.localize(datetime.now()) granularity = create_granularity("900") with closing(conn.cursor()) as cursor: datasource = name_to_datasource(cursor, "test-src009") entitytype = name_to_entitytype(cursor, "test-type001") trendstore = TrendStore(datasource, entitytype, granularity, 86400, "table").create(cursor) partition = trendstore.partition(timestamp) table = partition.table() partition.create(cursor) partition.check_columns_exist(trend_names, data_types)(cursor) store(conn, SCHEMA, table.name, trend_names, timestamp, data_rows) time.sleep(1) store(conn, SCHEMA, table.name, trend_names, timestamp, update_data_rows) conn.commit() query = table.select([Column("modified")]) query.execute(cursor) modified_list = [modified for modified in cursor.fetchall()] assert_not_equal(modified_list[0], modified_list[1]) table.select(Call("max", Column("modified"))).execute(cursor) max_modified = first(cursor.fetchone()) modified_table.select(Column("end")).where_( Eq(Column("table_name"), table.name)).execute(cursor) end = first(cursor.fetchone()) eq_(end, max_modified)
def test_update_and_modify_columns_fractured(conn): curr_timezone = timezone("Europe/Amsterdam") granularity = create_granularity("900") timestamp = curr_timezone.localize(datetime(2013, 1, 2, 10, 45, 0)) entity_ids = range(1023, 1023 + 100) trend_names_a = ["CellID", "CCR", "Drops"] data_rows_a = [(i, ("10023", "0.9919", "17")) for i in entity_ids] data_types_a = extract_data_types(data_rows_a) trend_names_b = ["CellID", "Drops"] data_rows_b = [(i, ("10023", "19")) for i in entity_ids] data_types_b = extract_data_types(data_rows_b) with closing(conn.cursor()) as cursor: datasource = name_to_datasource(cursor, "test-src009") entitytype = name_to_entitytype(cursor, "test-type001") trendstore = TrendStore(datasource, entitytype, granularity, 86400, "table").create(cursor) partition = trendstore.partition(timestamp) table = partition.table() partition.create(cursor) partition.check_columns_exist(trend_names_a, data_types_a)(cursor) conn.commit() store(conn, SCHEMA, table.name, trend_names_a, timestamp, data_rows_a) time.sleep(0.2) check_columns = map(Column, ["modified", "Drops"]) query = table.select(check_columns) with closing(conn.cursor()) as cursor: query.execute(cursor) row_before = cursor.fetchone() store(conn, SCHEMA, table.name, trend_names_b, timestamp, data_rows_b) query = table.select(check_columns) with closing(conn.cursor()) as cursor: query.execute(cursor) row_after = cursor.fetchone() assert_not_equal(row_before[0], row_after[0]) assert_not_equal(row_before[1], row_after[1])
def test_store_copy_from_1(conn): trend_names = ['CellID', 'CCR', 'CCRatts', 'Drops'] data_rows = [ (10023, ('10023', '0.9919', '2105', '17')), (10047, ('10047', '0.9963', '4906', '18')), (10048, ('10048', '0.9935', '2448', '16')), (10049, ('10049', '0.9939', '5271', '32')), (10050, ('10050', '0.9940', '3693', '22')), (10051, ('10051', '0.9944', '3753', '21')), (10052, ('10052', '0.9889', '2168', '24')), (10053, ('10053', '0.9920', '2372', '19')), (10085, ('10085', '0.9987', '2282', '3')), (10086, ('10086', '0.9972', '1763', '5')), (10087, ('10087', '0.9931', '1453', '10')) ] curr_timezone = timezone("Europe/Amsterdam") data_types = extract_data_types(data_rows) timestamp = curr_timezone.localize(datetime(2013, 1, 2, 10, 45, 0)) granularity = create_granularity("900") modified = curr_timezone.localize(datetime.now()) with closing(conn.cursor()) as cursor: datasource = name_to_datasource(cursor, "test-src009") entitytype = name_to_entitytype(cursor, "test-type001") trendstore = TrendStore(datasource, entitytype, granularity, 86400, "table").create(cursor) partition = trendstore.partition(timestamp) table = partition.table() partition.create(cursor) partition.check_columns_exist(trend_names, data_types)(cursor) store_copy_from(conn, SCHEMA, table.name, trend_names, timestamp, modified, data_rows) conn.commit() eq_(row_count(cursor, table), 11) table.select(Call("max", Column("modified"))).execute(cursor) max_modified = first(cursor.fetchone()) eq_(max_modified, modified)
def load_packages(self, stream, name): csvreader = csv.reader(stream, delimiter=self.delimiter, quotechar=self.quotechar) rows_by_timestamp = {} header = None self.active = not self.startstring for measurement in csvreader: if not self.active: # we are not yet in the part of the csv we have to look at self.active = self.startstring in measurement elif not header: # first line, containing the header information header = measurement else: # actual data row rowname = None timestamp = None trend_row = [] for (datatype, value) in zip(header, measurement): if datatype == self.datevar: timestamp = datetime.datetime.strptime( value, self.timeformat) elif datatype == self.idvar: rowname = value elif datatype in self.datavars: trend_row.append(value) if not rowname and timestamp: # insufficient data to create a datarow continue rows = rows_by_timestamp.get(timestamp) if not rows: rows = [] rows_by_timestamp[timestamp] = rows row_ident = '%s=%s' % (self.idname, rowname), rows.append((row_ident, trend_row)) trend_names = [ self.datavars[name] for name in [name for name in header if name in self.datavars] ] for timestamp, rows in rows_by_timestamp.items(): yield DefaultPackage(create_granularity('1 day'), timestamp, trend_names, rows)
def test_create_trendstore_with_children(self): granularity = create_granularity("900") partition_size = 3600 with closing(self.conn.cursor()) as cursor: trendstore = TrendStore(self.datasource, self.entitytype, granularity, partition_size, "table").create( cursor ) assert trendstore.id is not None timestamp = self.datasource.tzinfo.localize(datetime.datetime(2013, 5, 6, 14, 45)) partition = trendstore.partition(timestamp) partition.create(cursor)
def __init__( self, data_source, granularity, timestamp_is_start, auto_create_trend_store, auto_create_trends): self.data_source_name = data_source self.granularity = create_granularity(granularity) self.timestamp_is_start = timestamp_is_start if self.timestamp_is_start: self.offset = partial( offset_timestamp, timedelta(0, self.granularity) ) else: self.offset = identity self.auto_create_trend_store = auto_create_trend_store self.auto_create_trends = auto_create_trends
def packages(self, stream, name): reading_mapppings = [ ('AmbHum', lambda readings: readings.get('AmbHum')), ('PM1', lambda readings: readings.get('PM1')), ('UFP', lambda readings: readings.get('UFP')), ('PM25', lambda readings: readings.get('PM25')), ('Ozon', lambda readings: readings.get('Ozon')), ('PM10', lambda readings: readings.get('PM10')), ('Temp', lambda readings: readings.get('Temp')), ('RelHum', lambda readings: readings.get('RelHum')), ('AmbTemp', lambda readings: readings.get('AmbTemp')), ('NO2', lambda readings: readings.get('NO2')), ('GPS.lat', lambda readings: readings.get('GPS')['lat']), ('GPS.lon', lambda readings: readings.get('GPS')['lon']) ] data = json.load(stream) rows = [] for measurement in data: timestamp_int = measurement['last_measurement']['calibrated']['when']['$date'] timestamp = datetime.datetime.fromtimestamp( timestamp_int / 1000.0, None ) readings = measurement['last_measurement']['calibrated']['readings'] reading_values = [ str(measurement['_id']), timestamp, [ mapping(readings) for meas_name, mapping in reading_mapppings ] ] rows.append(reading_values) trends = [Trend(0, meas_name, float, 0, meas_name) for meas_name, mapping in reading_mapppings] yield DataPackage( AireasPackageType, create_granularity('1 day'), trends, rows )
def test_get_trendstore(conn): plugin = get_plugin("trend") instance = plugin(conn, api_version=4) granularity = create_granularity("900") with closing(conn.cursor()) as cursor: datasource = name_to_datasource(cursor, "test-src") entitytype = name_to_entitytype(cursor, "test-type") instance.TrendStore(datasource, entitytype, granularity, 86400, "table").create(cursor) trendstore = instance.get_trendstore(datasource, entitytype, granularity) assert_not_equal(trendstore, None)
def test_check_column_types(self): granularity = create_granularity("900") partition_size = 3600 trendstore = TrendStore(self.datasource, self.entitytype, granularity, partition_size, "table") with closing(self.conn.cursor()) as cursor: trendstore.create(cursor) column_names = ["counter1", "counter2"] initial_data_types = ["smallint", "smallint"] data_types = ["integer", "text"] check_columns_exist = trendstore.check_columns_exist(column_names, initial_data_types) check_columns_exist(cursor) check_column_types = trendstore.check_column_types(column_names, data_types) check_column_types(cursor)
def load(self, cursor): entitytype = name_to_entitytype(cursor, "materialize_dummytype001") self.datasource = name_to_datasource(cursor, "materialize_src_normal001") view_datasource = name_to_datasource(cursor, "vmaterialize_normal001") granularity = create_granularity('900') self.timestamp = self.datasource.tzinfo.localize( datetime.datetime(2013, 8, 26, 22, 0, 0)) trend_names = ["cntr"] rows_small = [ (1234, (55,)), (1235, (56,))] self.small_datapackage = DataPackage(granularity, self.timestamp, trend_names, rows_small) rows_large = [ (1234, (55243444334,)), (1235, (56242343242,))] self.large_datapackage = DataPackage(granularity, self.timestamp, trend_names, rows_large) self.trendstore = TrendStore(self.datasource, entitytype, granularity, 86400, 'table') self.trendstore.create(cursor) partition = self.trendstore.partition(self.timestamp) partition.create(cursor) self.trendstore.check_columns_exist(trend_names, ["smallint"])(cursor) modified = self.datasource.tzinfo.localize(datetime.datetime.now()) store_copy_from(cursor, partition.table(), self.small_datapackage, modified) mark_modified(cursor, partition.table(), self.timestamp, modified) view_trendstore = TrendStore(view_datasource, entitytype, granularity, 0, 'view').create(cursor) sql = ( "SELECT " "entity_id, " "timestamp, " 'cntr FROM {}').format(self.trendstore.base_table().render()) self.view = View(view_trendstore, sql).define(cursor).create(cursor)
def test_retrieve_aggregate(self): granularity = create_granularity("900") with closing(self.conn.cursor()) as cursor: datasource = name_to_datasource(cursor, "test") entitytype = name_to_entitytype(cursor, "Cell") TrendStore(datasource, entitytype, granularity, 86400, "table").create(cursor) column_expressions = ["COUNT(entity_id)"] start = datasource.tzinfo.localize(datetime(2012, 12, 6, 14, 15)) end = datasource.tzinfo.localize(datetime(2012, 12, 6, 14, 15)) interval = start, end retrieve_aggregated(self.conn, datasource, granularity, entitytype, column_expressions, interval, group_by="entity_id")
def test_execute_gzipped(): file_path = '/tmp/data.csv.gz' value = 42 with gzip.open(file_path, 'wt') as test_file: test_file.write('{}\n'.format(value)) with closing(connect('')) as conn: clear_database(conn) with closing(conn.cursor()) as cursor: data_source = DataSource.create( 'pm-system-1', 'data source for integration test')(cursor) entity_type = EntityType.create( 'Node', 'entity type for integration test')(cursor) TableTrendStore.create( TableTrendStoreDescriptor( data_source, entity_type, create_granularity('3600 seconds'), [TrendDescriptor('x', datatype.Integer, '')], 86400 * 7))(cursor) conn.commit() job = HarvestJob(id_=1001, plugins={'test-data': TestPlugin()}, existence=Existence(conn), conn=conn, description={ "data_type": "test-data", "on_success": ["do_nothing"], "on_failure": ["do_nothing"], "parser_config": { "sub-type": "integer" }, "uri": file_path, "data_source": "pm-system-1" }) job.execute()
def get_by_id(cls, cursor, id): args = (id,) cls.get_by_id_query.execute(cursor, args) if cursor.rowcount == 1: trendstore_id, datasource_id, entitytype_id, granularity_str, \ partition_size, type, version = cursor.fetchone() datasource = get_datasource_by_id(cursor, datasource_id) entitytype = get_entitytype_by_id(cursor, entitytype_id) granularity = create_granularity(granularity_str) trendstore = TrendStore(datasource, entitytype, granularity, partition_size, type) trendstore.id = trendstore_id return trendstore
def test_merge_packages(): granularity = create_granularity("900") timestamp = "2013-05-28 12:00:00" trend_names = ["counter_a", "counter_b", "counter_c"] rows = [ ("Network=SouthPole,Rnc=SP1,Rbs=AdmundsenScott1", ("34", "10.3", "334303")), ("Network=SouthPole,Rnc=SP1,Rbs=AdmundsenScott2", ("42", "8.5", "206441"))] raw_datapackage_1 = RawDataPackage(granularity, timestamp, trend_names, rows) trend_names = ["counter_d", "counter_e"] rows = [ ("Network=SouthPole,Rnc=SP1,Rbs=AdmundsenScott1", ("2", "0.003")), ("Network=SouthPole,Rnc=SP1,Rbs=AdmundsenScott2", ("0", "0.090"))] raw_datapackage_2 = RawDataPackage(granularity, timestamp, trend_names, rows) packages = [raw_datapackage_1, raw_datapackage_2] merged_packages = RawDataPackage.merge_packages(packages) eq_(len(merged_packages), 1)
def test_update(conn): trend_names = ["CellID", "CCR", "Drops"] data_rows = [ (10023, ("10023", "0.9919", "17")), (10047, ("10047", "0.9963", "18")) ] data_types = extract_data_types(data_rows) update_data_rows = [(10023, ("10023", "0.5555", "17"))] timestamp = datetime.now() granularity = create_granularity("900") with closing(conn.cursor()) as cursor: datasource = name_to_datasource(cursor, "test-src009") entitytype = name_to_entitytype(cursor, "test-type001") trendstore = TrendStore(datasource, entitytype, granularity, 86400, "table").create(cursor) partition = trendstore.partition(timestamp) table = partition.table() partition.create(cursor) partition.check_columns_exist(trend_names, data_types)(cursor) store(conn, SCHEMA, table.name, trend_names, timestamp, data_rows) store(conn, SCHEMA, table.name, trend_names, timestamp, update_data_rows) conn.commit() query = table.select([Column("modified"), Column("CCR")]) with closing(conn.cursor()) as cursor: query.execute(cursor) rows = cursor.fetchall() assert_not_equal(rows[0][0], rows[1][0]) assert_not_equal(rows[0][1], rows[1][1])
def load_packages(self, stream, name): csvreader = csv.reader(stream, delimiter=self.delimiter, quotechar=self.quotechar) rows_by_timestamp = {} header = None self.active = not self.startstring for measurement in csvreader: if not self.active: # we are not yet in the part of the csv we have to look at self.active = self.startstring in measurementx elif not header: # first line, containing the header information header = measurement else: # actual data row rowname = None timestamp = None trend_row = [] for (datatype, value) in zip(header, measurement): value = self.changeddata(datatype, value) if value == '' and datatype not in self.allowempty: value = None if value == '': value = None if datatype == self.datevar: for format in self.timeformats: try: timestamp = datetime.datetime.strptime( value, format) + self.timeshift break except ValueError: continue else: raise ValueError("No applicable timeformat") elif datatype == self.idvar: rowname = value elif datatype in self.datavars: trend_row.append(value) if not rowname and timestamp: # insufficient data to create a datarow continue rows = rows_by_timestamp.get(timestamp) if not rows: rows = [] rows_by_timestamp[timestamp] = rows row_ident = '{}={}'.format(self.idname, rowname) rows.append((row_ident, trend_row)) trend_names = [ self.datavars[name] for name in [name for name in header if name in self.datavars] ] package_type = DataPackageType(DnRef(), entity_type_name_from_dn) for timestamp, rows in rows_by_timestamp.items(): print("{}: {}".format(timestamp, rows)) yield DataPackage(package_type, create_granularity('1 day'), timestamp, trend_names, rows)
def __init__(self): self.granularity = create_granularity("900")