def test_can_generate_select_by_column(self): expected = 'SELECT "sq0"."year" FROM (SELECT * FROM "public"."crs_current") "sq0"' OperationStep.objects.create( operation=self.op, step_id=2, name="Select", query_func="select", query_kwargs="{ \"columns\": [ \"year\" ] }", source_id=1 ) qb = QueryBuilder(self.op) self.assertEqual(qb.get_sql_without_limit(), expected)
def test_can_generate_join(self): expected = 'SELECT "sq0".*,"dac1_current".* FROM (SELECT * FROM "public"."crs_current") "sq0" FULL OUTER JOIN "public"."dac1_current" ON "sq0"."donor_code"="dac1_current"."donor_code"' OperationStep.objects.create( operation=self.op, step_id=2, name='Join', query_func='join', query_kwargs='{"table_name":"dac1_current","schema_name":"public", "join_on":{"donor_code":"donor_code"}}', source_id=2 ) qb = QueryBuilder(self.op) self.assertEqual(qb.get_sql_without_limit(), expected)
def test_can_generate_filter(self): expected = 'SELECT "sq0".* FROM (SELECT * FROM "public"."crs_current") "sq0" WHERE "sq0"."year">=1973 OR "sq0"."short_description" ILIKE \'%sector%\' OR "sq0"."short_description" ILIKE \'%wheat%\'' OperationStep.objects.create( operation=self.op, step_id=2, name="Filter", query_func="filter", query_kwargs='{"filters":[{"field":"year", "value":1973, "func":"ge"},{"field":"short_description", "value":"%sector%|%wheat%", "func":"text_search"}]}', source_id=1 ) qb = QueryBuilder(self.op) self.assertEqual(qb.get_sql_without_limit(), expected)
def test_can_perform_multi_transform(self): expected = 'SELECT "sq0".*,0+"sq0"."usd_commitment"+"sq0"."usd_disbursement" "usd_commitment_sum" FROM (SELECT * FROM "public"."crs_current") "sq0"' OperationStep.objects.create( operation=self.op, step_id=2, name='Transform', query_func='multi_transform', query_kwargs='{"trans_func_name":"sum", "operational_columns":["usd_commitment","usd_disbursement"]}', source_id=2 ) qb = QueryBuilder(self.op) self.assertEqual(qb.get_sql_without_limit(), expected)
def test_can_generate_avg_aggregate(self): expected = 'SELECT "sq0"."year",AVG("sq0"."usd_commitment") "usd_commitment_Avg" FROM (SELECT * FROM "public"."crs_current") "sq0" GROUP BY "sq0"."year"' OperationStep.objects.create( operation=self.op, step_id=2, name='Aggregate', query_func='aggregate', query_kwargs='{"group_by":["year"],"agg_func_name":"Avg", "operational_column":"usd_commitment"}', source_id=2 ) qb = QueryBuilder(self.op) self.assertEqual(qb.get_sql_without_limit(), expected)
def test_can_perform_last_value(self): expected = 'SELECT "sq0".*,LAST_VALUE("sq0"."usd_commitment") OVER(ORDER BY "sq0"."year") FROM (SELECT * FROM "public"."crs_current") "sq0"' OperationStep.objects.create( operation=self.op, step_id=2, name='Window last val', query_func='window', query_kwargs='{"window_fn":"LastValue","term":"usd_commitment","order_by":["year"]}', source_id=2 ) qb = QueryBuilder(self.op) self.assertEqual(qb.get_sql_without_limit(), expected)
def test_can_generate_perform_dense_rank(self): expected = 'SELECT "sq0".*,DENSE_RANK() OVER(ORDER BY "sq0"."usd_commitment") FROM (SELECT * FROM "public"."crs_current") "sq0"' OperationStep.objects.create( operation=self.op, step_id=2, name='Window denserank', query_func='window', query_kwargs='{"window_fn":"DenseRank","order_by":["usd_commitment"]}', source_id=2 ) qb = QueryBuilder(self.op) self.assertEqual(qb.get_sql_without_limit(), expected)
def test_can_perform_scalar_transform(self): expected = 'SELECT "sq0".*,"sq0"."short_description" ILIKE \'%wheat%\' "short_description_text_search" FROM (SELECT * FROM "public"."crs_current") "sq0"' OperationStep.objects.create( operation=self.op, step_id=2, name='Transform', query_func='scalar_transform', query_kwargs='{"trans_func_name":"text_search", "operational_column":"short_description", "operational_value":"%wheat%"}', source_id=2 ) qb = QueryBuilder(self.op) self.assertEqual(qb.get_sql_without_limit(), expected)
def test_can_perform_stddev(self): expected = 'SELECT "sq0".*,STDDEV("sq0"."usd_commitment") OVER(PARTITION BY "sq0"."year") FROM (SELECT * FROM "public"."crs_current") "sq0"' OperationStep.objects.create( operation=self.op, step_id=2, name='Window StdDev', query_func='window', query_kwargs='{"window_fn":"StdDev","term":"usd_commitment","over":["year"]}', source_id=2 ) qb = QueryBuilder(self.op) self.assertEqual(qb.get_sql_without_limit(), expected)
def test_can_generate_join_for_specific_columns(self): expected = 'SELECT "sq0".*,"dac1_current"."part_code","dac1_current"."part_name" FROM (SELECT * FROM "public"."crs_current") "sq0" FULL OUTER JOIN "public"."dac1_current" ON "sq0"."donor_code"="dac1_current"."donor_code" AND "sq0"."year"="dac1_current"."year"' OperationStep.objects.create( operation=self.op, step_id=2, name='Join', query_func='join', query_kwargs='{"table_name":"dac1_current","schema_name":"public", "join_on":{"donor_code":"donor_code","year":"year"},\ "columns_x":["donor_name","usd_commitment"],"columns_y":["part_code","part_name"]}', source_id=2 ) qb = QueryBuilder(self.op) self.assertEqual(qb.get_sql_without_limit(), expected)
def get_query(self, request): if 'config' in request.data and request.data['config']: return query.get_advanced_config_query(request.data['config']) elif 'operation_steps' in request.data: return QueryBuilder(operation_steps=request.data['operation_steps']).get_sql_without_limit() else: return ''
def test_can_sum_from_joined_column_fails(self): expected = 'SELECT "donor_code",SUM("usd_commitment") "usd_commitment_Sum" FROM "public"."crs_current" GROUP BY "donor_code"' OperationStep.objects.create( operation=self.op, step_id=2, name='Join', query_func='join', query_kwargs='{"table_name":"dac1_current","schema_name":"public", "join_on":{"donor_code":"donor_code"}}', source_id=2 ) OperationStep.objects.create( operation=self.op, step_id=3, name='Aggregate', query_func='aggregate', query_kwargs='{"group_by":["donor_code"],"agg_func_name":"Sum", "operational_column":"usd_commitment"}', source_id=2 ) qb = QueryBuilder(self.op) self.assertNotEqual(qb.get_sql_without_limit(), expected)
def build_query(operation=None, steps=None, limit=None, offset=None, estimate_count=None, frozen_table_id=None): """Build an SQL query""" count_query = QueryBuilder(operation=operation, operation_steps=steps).count_sql(estimate_count) if not count_query and estimate_count: # if count returns 0, remove estimate TODO: figure out why count would contradict actual results count_query = QueryBuilder(operation=operation, operation_steps=steps).count_sql(False) if limit is None: data_query = QueryBuilder(operation=operation, operation_steps=steps).get_sql_without_limit() else: data_query = QueryBuilder(operation=operation, operation_steps=steps).get_sql(limit, offset) if frozen_table_id: # We replace the old schema and table names with the ones of the frozen one try: frozen_data = FrozenData.objects.get(id=frozen_table_id) frozen_data_table = str(Table(frozen_data.frozen_db_table, schema="archives")) source = Source.objects.get(active_mirror_name=frozen_data.parent_db_table) full_table_name = str(Table(source.active_mirror_name, schema=source.schema)) table_name = source.active_mirror_name count_query = count_query.replace(full_table_name, frozen_data_table) data_query = data_query.replace(table_name, frozen_data.frozen_db_table) except FrozenData.DoesNotExist: # We just ignore and use the parent table pass return (count_query, data_query)
def checkAllQueries(self): queries = Operation.objects.all() for query in queries: print(query.id) sql = QueryBuilder(operation=query).get_sql(limit=2) results = analyse_query(sql) if results[0]['result'] == 'success': continue else: self.stdout.write( self.style.ERROR( "Failed for Operation {} - {} with error {}".format( query.id, query.name, results[0]['error']))) input('Press Enter to continue...')
def test_can_sum_from_joined_column_passes(self): expected = 'SELECT "sq1"."part_name",SUM("sq1"."usd_commitment") "usd_commitment_Sum" FROM (SELECT "sq0".*,"dac1_current"."part_code","dac1_current"."part_name" FROM (SELECT * FROM "public"."crs_current") "sq0" FULL OUTER JOIN "public"."dac1_current" ON "sq0"."year"="dac1_current"."year") "sq1" GROUP BY "sq1"."part_name"' OperationStep.objects.create( operation=self.op, step_id=2, name='Join', query_func='join', query_kwargs='{"table_name":"dac1_current","schema_name":"public", "join_on":{"year":"year"}\ ,"columns_x":["donor_name","usd_commitment"],"columns_y":["part_code","part_name"]}', source_id=2 ) OperationStep.objects.create( operation=self.op, step_id=3, name='Aggregate', query_func='aggregate', query_kwargs='{"group_by":["part_name"],"agg_func_name":"Sum", "operational_column":"usd_commitment"}', source_id=2 ) qb = QueryBuilder(self.op) self.assertEqual(qb.get_sql_without_limit(), expected)
def count_operation_rows(id): try: operation = Operation.objects.get(id=id) if operation: if operation.advanced_config and len(operation.advanced_config): count_query = get_advanced_config_count_query(operation.advanced_config, False) else: count_query = QueryBuilder(operation=operation, operation_steps=None).count_sql(False) count = count_rows(count_query) operation.row_count = count operation.count_rows = False operation.save() return { "status": "success" } else: return { "status": "failed" } except(Operation.DoesNotExist): return { "status": "failed", "reason": "Operation ID {} does not exist".format(id)}
def test_can_generate_select_all(self): expected = 'SELECT * FROM "public"."crs_current"' qb = QueryBuilder(self.op) self.assertEqual(qb.get_sql_without_limit(), expected)
def querytime_estimate(operation=None, operation_steps=None): query = QueryBuilder(operation=operation, operation_steps=operation_steps).get_sql_without_limit() return analyse_query(query)
def test_can_generate_select_with_default_limit(self): expected = 'SELECT * FROM "public"."crs_current" LIMIT 10' qb = QueryBuilder(self.op) self.assertEqual(qb.get_sql(), expected)