def test_agg_cols_explode(self): t = hl.utils.range_matrix_table(1, 10) tests = [(agg.explode( lambda elt: agg.collect(elt + 1).append(0), hl.cond(t.col_idx > 7, [t.col_idx, t.col_idx + 1], hl.empty_array(hl.tint32))), [9, 10, 10, 11, 0]), (agg.explode( lambda elt: agg.explode( lambda elt2: agg.collect(elt2 + 1).append(0), [elt, elt + 1]), hl.cond(t.col_idx > 7, [t.col_idx, t.col_idx + 1], hl.empty_array(hl.tint32))), [9, 10, 10, 11, 10, 11, 11, 12, 0]), (agg.explode( lambda elt: agg.filter(elt > 8, agg.collect(elt + 1).append(0)), hl.cond(t.col_idx > 7, [t.col_idx, t.col_idx + 1], hl.empty_array(hl.tint32))), [10, 10, 11, 0]), (agg.explode( lambda elt: agg.group_by(elt % 3, agg.collect(elt + 1).append(0)), hl.cond(t.col_idx > 7, [t.col_idx, t.col_idx + 1], hl.empty_array(hl.tint32))), { 0: [10, 10, 0], 1: [11, 0], 2: [9, 0] })] for aggregation, expected in tests: self.assertEqual( t.select_rows(result=aggregation).result.collect()[0], expected)
def test_agg_cols_group_by(self): t = hl.utils.range_matrix_table(1, 10) tests = [ (agg.group_by( t.col_idx % 2, hl.array(agg.collect_as_set(t.col_idx + 1)).append(0)), { 0: [1, 3, 5, 7, 9, 0], 1: [2, 4, 6, 8, 10, 0] }), (agg.group_by( t.col_idx % 3, agg.filter( t.col_idx > 7, hl.array(agg.collect_as_set(t.col_idx + 1)).append(0))), { 0: [10, 0], 1: [0], 2: [9, 0] }), (agg.group_by( t.col_idx % 3, agg.explode( lambda elt: agg.collect(elt + 1).append(0), hl.cond(t.col_idx > 7, [t.col_idx, t.col_idx + 1], hl.empty_array(hl.tint32)))), { 0: [10, 11, 0], 1: [0], 2: [9, 10, 0] }), ] for aggregation, expected in tests: self.assertEqual( t.select_rows(result=aggregation).result.collect()[0], expected)
def test_aggregate2(self): schema = hl.tstruct(status=hl.tint32, GT=hl.tcall, qPheno=hl.tint32) rows = [{'status': 0, 'GT': hl.Call([0, 0]), 'qPheno': 3}, {'status': 0, 'GT': hl.Call([0, 1]), 'qPheno': 13}] kt = hl.Table.parallelize(rows, schema) result = convert_struct_to_dict( kt.group_by(status=kt.status) .aggregate( x1=agg.collect(kt.qPheno * 2), x2=agg.explode(lambda elt: agg.collect(elt), [kt.qPheno, kt.qPheno + 1]), x3=agg.min(kt.qPheno), x4=agg.max(kt.qPheno), x5=agg.sum(kt.qPheno), x6=agg.product(hl.int64(kt.qPheno)), x7=agg.count(), x8=agg.count_where(kt.qPheno == 3), x9=agg.fraction(kt.qPheno == 1), x10=agg.stats(hl.float64(kt.qPheno)), x11=agg.hardy_weinberg_test(kt.GT), x13=agg.inbreeding(kt.GT, 0.1), x14=agg.call_stats(kt.GT, ["A", "T"]), x15=agg.collect(hl.Struct(a=5, b="foo", c=hl.Struct(banana='apple')))[0], x16=agg.collect(hl.Struct(a=5, b="foo", c=hl.Struct(banana='apple')).c.banana)[0], x17=agg.explode(lambda elt: agg.collect(elt), hl.null(hl.tarray(hl.tint32))), x18=agg.explode(lambda elt: agg.collect(elt), hl.null(hl.tset(hl.tint32))), x19=agg.take(kt.GT, 1, ordering=-kt.qPheno) ).take(1)[0]) expected = {u'status': 0, u'x13': {u'n_called': 2, u'expected_homs': 1.64, u'f_stat': -1.777777777777777, u'observed_homs': 1}, u'x14': {u'AC': [3, 1], u'AF': [0.75, 0.25], u'AN': 4, u'homozygote_count': [1, 0]}, u'x15': {u'a': 5, u'c': {u'banana': u'apple'}, u'b': u'foo'}, u'x10': {u'min': 3.0, u'max': 13.0, u'sum': 16.0, u'stdev': 5.0, u'n': 2, u'mean': 8.0}, u'x8': 1, u'x9': 0.0, u'x16': u'apple', u'x11': {u'het_freq_hwe': 0.5, u'p_value': 0.5}, u'x2': [3, 4, 13, 14], u'x3': 3, u'x1': [6, 26], u'x6': 39, u'x7': 2, u'x4': 13, u'x5': 16, u'x17': [], u'x18': [], u'x19': [hl.Call([0, 1])]} self.maxDiff = None self.assertDictEqual(result, expected)
def test_aggregate2(self): schema = hl.tstruct(status=hl.tint32, GT=hl.tcall, qPheno=hl.tint32) rows = [{'status': 0, 'GT': hl.Call([0, 0]), 'qPheno': 3}, {'status': 0, 'GT': hl.Call([0, 1]), 'qPheno': 13}] kt = hl.Table.parallelize(rows, schema) result = convert_struct_to_dict( kt.group_by(status=kt.status) .aggregate( x1=agg.collect(kt.qPheno * 2), x2=agg.explode(lambda elt: agg.collect(elt), [kt.qPheno, kt.qPheno + 1]), x3=agg.min(kt.qPheno), x4=agg.max(kt.qPheno), x5=agg.sum(kt.qPheno), x6=agg.product(hl.int64(kt.qPheno)), x7=agg.count(), x8=agg.count_where(kt.qPheno == 3), x9=agg.fraction(kt.qPheno == 1), x10=agg.stats(hl.float64(kt.qPheno)), x11=agg.hardy_weinberg_test(kt.GT), x13=agg.inbreeding(kt.GT, 0.1), x14=agg.call_stats(kt.GT, ["A", "T"]), x15=agg.collect(hl.Struct(a=5, b="foo", c=hl.Struct(banana='apple')))[0], x16=agg.collect(hl.Struct(a=5, b="foo", c=hl.Struct(banana='apple')).c.banana)[0], x17=agg.explode(lambda elt: agg.collect(elt), hl.null(hl.tarray(hl.tint32))), x18=agg.explode(lambda elt: agg.collect(elt), hl.null(hl.tset(hl.tint32))), x19=agg.take(kt.GT, 1, ordering=-kt.qPheno) ).take(1)[0]) expected = {u'status': 0, u'x13': {u'n_called': 2, u'expected_homs': 1.64, u'f_stat': -1.777777777777777, u'observed_homs': 1}, u'x14': {u'AC': [3, 1], u'AF': [0.75, 0.25], u'AN': 4, u'homozygote_count': [1, 0]}, u'x15': {u'a': 5, u'c': {u'banana': u'apple'}, u'b': u'foo'}, u'x10': {u'min': 3.0, u'max': 13.0, u'sum': 16.0, u'stdev': 5.0, u'n': 2, u'mean': 8.0}, u'x8': 1, u'x9': 0.0, u'x16': u'apple', u'x11': {u'het_freq_hwe': 0.5, u'p_value': 0.5}, u'x2': [3, 4, 13, 14], u'x3': 3, u'x1': [6, 26], u'x6': 39, u'x7': 2, u'x4': 13, u'x5': 16, u'x17': [], u'x18': [], u'x19': [hl.Call([0, 1])]} self.maxDiff = None self.assertDictEqual(result, expected)
def test_agg_cols_explode(self): t = hl.utils.range_matrix_table(1, 10) tests = [(agg.explode(lambda elt: agg.collect(elt + 1).append(0), hl.cond(t.col_idx > 7, [t.col_idx, t.col_idx + 1], hl.empty_array(hl.tint32))), [9, 10, 10, 11, 0]), (agg.explode(lambda elt: agg.explode(lambda elt2: agg.collect(elt2 + 1).append(0), [elt, elt + 1]), hl.cond(t.col_idx > 7, [t.col_idx, t.col_idx + 1], hl.empty_array(hl.tint32))), [9, 10, 10, 11, 10, 11, 11, 12, 0]), (agg.explode(lambda elt: agg.filter(elt > 8, agg.collect(elt + 1).append(0)), hl.cond(t.col_idx > 7, [t.col_idx, t.col_idx + 1], hl.empty_array(hl.tint32))), [10, 10, 11, 0]), (agg.explode(lambda elt: agg.group_by(elt % 3, agg.collect(elt + 1).append(0)), hl.cond(t.col_idx > 7, [t.col_idx, t.col_idx + 1], hl.empty_array(hl.tint32))), {0: [10, 10, 0], 1: [11, 0], 2:[9, 0]}) ] for aggregation, expected in tests: self.assertEqual(t.select_rows(result = aggregation).result.collect()[0], expected)
def test_agg_cols_filter(self): t = hl.utils.range_matrix_table(1, 10) tests = [(agg.filter(t.col_idx > 7, agg.collect(t.col_idx + 1).append(0)), [9, 10, 0]), (agg.filter(t.col_idx > 7, agg.explode(lambda elt: agg.collect(elt + 1).append(0), [t.col_idx, t.col_idx + 1])), [9, 10, 10, 11, 0]), (agg.filter(t.col_idx > 7, agg.group_by(t.col_idx % 3, hl.array(agg.collect_as_set(t.col_idx + 1)).append(0))), {0: [10, 0], 2: [9, 0]}) ] for aggregation, expected in tests: self.assertEqual(t.select_rows(result = aggregation).result.collect()[0], expected)
def test_agg_cols_filter(self): t = hl.utils.range_matrix_table(1, 10) tests = [(agg.filter(t.col_idx > 7, agg.collect(t.col_idx + 1).append(0)), [9, 10, 0]), (agg.filter(t.col_idx > 7, agg.explode(lambda elt: agg.collect(elt + 1).append(0), [t.col_idx, t.col_idx + 1])), [9, 10, 10, 11, 0]), (agg.filter(t.col_idx > 7, agg.group_by(t.col_idx % 3, hl.array(agg.collect_as_set(t.col_idx + 1)).append(0))), {0: [10, 0], 2: [9, 0]}) ] for aggregation, expected in tests: self.assertEqual(t.select_rows(result = aggregation).result.collect()[0], expected)
def test_aggregate1(self): schema = hl.tstruct(a=hl.tint32, b=hl.tint32, c=hl.tint32, d=hl.tint32, e=hl.tstr, f=hl.tarray(hl.tint32)) rows = [{ 'a': 4, 'b': 1, 'c': 3, 'd': 5, 'e': "hello", 'f': [1, 2, 3] }, { 'a': 0, 'b': 5, 'c': 13, 'd': -1, 'e': "cat", 'f': [] }, { 'a': 4, 'b': 2, 'c': 20, 'd': 3, 'e': "dog", 'f': [5, 6, 7] }] kt = hl.Table.parallelize(rows, schema) results = kt.aggregate( hl.Struct(q1=agg.sum(kt.b), q2=agg.count(), q3=agg.collect(kt.e), q4=agg.filter((kt.d >= 5) | (kt.a == 0), agg.collect(kt.e)), q5=agg.explode(lambda elt: agg.mean(elt), kt.f))) self.assertEqual(results.q1, 8) self.assertEqual(results.q2, 3) self.assertEqual(set(results.q3), {"hello", "cat", "dog"}) self.assertEqual(set(results.q4), {"hello", "cat"}) self.assertAlmostEqual(results.q5, 4)
def test_agg_cols_group_by(self): t = hl.utils.range_matrix_table(1, 10) tests = [(agg.group_by(t.col_idx % 2, hl.array(agg.collect_as_set(t.col_idx + 1)).append(0)), {0: [1, 3, 5, 7, 9, 0], 1: [2, 4, 6, 8, 10, 0]}), (agg.group_by(t.col_idx % 3, agg.filter(t.col_idx > 7, hl.array(agg.collect_as_set(t.col_idx + 1)).append(0))), {0: [10, 0], 1: [0], 2: [9, 0]}), (agg.group_by(t.col_idx % 3, agg.explode(lambda elt: agg.collect(elt + 1).append(0), hl.cond(t.col_idx > 7, [t.col_idx, t.col_idx + 1], hl.empty_array(hl.tint32)))), {0: [10, 11, 0], 1: [0], 2:[9, 10, 0]}), ] for aggregation, expected in tests: self.assertEqual(t.select_rows(result = aggregation).result.collect()[0], expected)
def test_aggregate1(self): schema = hl.tstruct(a=hl.tint32, b=hl.tint32, c=hl.tint32, d=hl.tint32, e=hl.tstr, f=hl.tarray(hl.tint32)) rows = [{'a': 4, 'b': 1, 'c': 3, 'd': 5, 'e': "hello", 'f': [1, 2, 3]}, {'a': 0, 'b': 5, 'c': 13, 'd': -1, 'e': "cat", 'f': []}, {'a': 4, 'b': 2, 'c': 20, 'd': 3, 'e': "dog", 'f': [5, 6, 7]}] kt = hl.Table.parallelize(rows, schema) results = kt.aggregate(hl.Struct(q1=agg.sum(kt.b), q2=agg.count(), q3=agg.collect(kt.e), q4=agg.filter((kt.d >= 5) | (kt.a == 0), agg.collect(kt.e)), q5=agg.explode(lambda elt: agg.mean(elt), kt.f))) self.assertEqual(results.q1, 8) self.assertEqual(results.q2, 3) self.assertEqual(set(results.q3), {"hello", "cat", "dog"}) self.assertEqual(set(results.q4), {"hello", "cat"}) self.assertAlmostEqual(results.q5, 4)