def test_recast_1(): table = (('id', 'variable', 'value'), (3, 'age', 16), (1, 'gender', 'F'), (2, 'gender', 'M'), (2, 'age', 17), (1, 'age', 12), (3, 'gender', 'M')) expectation = (('id', 'age', 'gender'), (1, 12, 'F'), (2, 17, 'M'), (3, 16, 'M')) # by default lift 'variable' field, hold everything else result = recast(table) ieq(expectation, result) result = recast(table, variablefield='variable') ieq(expectation, result) result = recast(table, key='id', variablefield='variable') ieq(expectation, result) result = recast(table, key='id', variablefield='variable', valuefield='value') ieq(expectation, result)
def test_recast_date(): dt = datetime.now().replace table = (('id', 'variable', 'value'), (dt(hour=3), 'age', 16), (dt(hour=1), 'gender', 'F'), (dt(hour=2), 'gender', 'M'), (dt(hour=2), 'age', 17), (dt(hour=1), 'age', 12), (dt(hour=3), 'gender', 'M')) expectation = (('id', 'age', 'gender'), (dt(hour=1), 12, 'F'), (dt(hour=2), 17, 'M'), (dt(hour=3), 16, 'M')) # by default lift 'variable' field, hold everything else result = recast(table) ieq(expectation, result) result = recast(table, variablefield='variable') ieq(expectation, result) result = recast(table, key='id', variablefield='variable') ieq(expectation, result) result = recast(table, key='id', variablefield='variable', valuefield='value') ieq(expectation, result)
def test_recast4(): # deal with missing data table = (('id', 'variable', 'value'), (1, 'gender', 'F'), (2, 'age', 17), (1, 'age', 12), (3, 'gender', 'M')) result = recast(table, key='id') expect = (('id', 'age', 'gender'), (1, 12, 'F'), (2, 17, None), (3, None, 'M')) ieq(expect, result)
def test_recast_3(): table = (('id', 'time', 'variable', 'value'), (1, 11, 'weight', 66.4), (1, 14, 'weight', 55.2), (2, 12, 'weight', 53.2), (2, 16, 'weight', 43.3), (3, 12, 'weight', 34.5), (3, 17, 'weight', 49.4)) expectation = (('id', 'time', 'weight'), (1, 11, 66.4), (1, 14, 55.2), (2, 12, 53.2), (2, 16, 43.3), (3, 12, 34.5), (3, 17, 49.4)) result = recast(table) ieq(expectation, result) # in the absence of an aggregation function, list all values expectation = (('id', 'weight'), (1, [66.4, 55.2]), (2, [53.2, 43.3]), (3, [34.5, 49.4])) result = recast(table, key='id') ieq(expectation, result) # max aggregation expectation = (('id', 'weight'), (1, 66.4), (2, 53.2), (3, 49.4)) result = recast(table, key='id', reducers={'weight': max}) ieq(expectation, result) # min aggregation expectation = (('id', 'weight'), (1, 55.2), (2, 43.3), (3, 34.5)) result = recast(table, key='id', reducers={'weight': min}) ieq(expectation, result) # mean aggregation expectation = (('id', 'weight'), (1, 60.80), (2, 48.25), (3, 41.95)) def mean(values): return float(sum(values)) / len(values) def meanf(precision): def f(values): v = mean(values) v = round(v, precision) return v return f result = recast(table, key='id', reducers={'weight': meanf(precision=2)}) ieq(expectation, result)
def test_recast_2(): table = (('id', 'variable', 'value'), (3, 'age', 16), (1, 'gender', 'F'), (2, 'gender', 'M'), (2, 'age', 17), (1, 'age', 12), (3, 'gender', 'M')) expectation = (('id', 'gender'), (1, 'F'), (2, 'M'), (3, 'M')) # can manually pick which variables you want to recast as fields # TODO this is awkward result = recast(table, key='id', variablefield={'variable': ['gender']}) ieq(expectation, result)
def test_recast_empty(): table = (('foo', 'variable', 'value'),) expect = (('foo',),) actual = recast(table) ieq(expect, actual)