def test_basic(self): rsa = [(1,2,3,4),(5,6),(7,8,9)] rsb = [(0,1,0,1),(0,1),(0,1,0)] rsc = [(1,2,3),(4,),(5,6,7,8,9)] rsd = [(0,1,0),(1,),(0,1,0,1,0)] # source rss = RecordSet(recordType='ab') for g in zip(rsa,rsb): rss.append(v for v in zip(*g)) # target rst = RecordSet(recordType='ef') for g in zip(rsc,rsd): rst.append(v for v in zip(*g)) regroup = Regroup(rss, rst) # verify it has the same columns as the target self.assertEqual( regroup.results._RecordType._fields, ('e', 'f') ) self.assertEqual( [[record._tuple for record in group] for group in regroup], [[(1, 0), (2, 1), (3, 0), (4, 1)], [(5, 0), (6, 1)], [(7, 0), (8, 1), (9, 0)]] )
def test_basic(self): rsa = [(1, 2, 3, 4), (5, 6), (7, 8, 9)] rsb = [(0, 1, 0, 1), (0, 1), (0, 1, 0)] rsc = [(9, 8, 7, 6, 5), (4, 3, 2), (1, )] rsd = [(1, 0, 1, 0, 1), (0, 1, 0), (1, )] rs1 = RecordSet(recordType='ab') for g in zip(rsa, rsb): rs1.append(v for v in zip(*g)) rs2 = RecordSet(recordType='cb') for g in zip(rsc, rsd): rs2.append(v for v in zip(*g)) merge = Merge([rs1, rs2]) self.assertEqual(merge.results._RecordType._fields, ('a', 'b', 'c')) self.assertEqual(merge.results._groups[0][0]._tuple, (1, 1, 9)) self.assertEqual([[record._tuple for record in group] for group in merge], [[(1, 1, 9), (2, 0, 8), (3, 1, 7), (4, 0, 6), (5, 1, 5), (6, 0, 4), (7, 1, 3), (8, 0, 2), (9, 1, 1)]])
class LagBucket(Transform): """Creates a new recordset of tuples of each of the given records, with the first being from `lag` records back. """ __slots__ = ('_lag', '_lagRecords') ScanClass = RecordScanner def __init__(self, source, lag=1, *args, **kwargs): #Initialize mixins super(LagBucket, self).__init__(*args, **kwargs) self._lag = lag self.sources = (source,) self._resultset = RecordSet(recordType=source._RecordType) self.scanners = (self.ScanClass(source),) self._lagRecords = [] def transform(self): while len(self._lagRecords) < self._lag: self._lagRecords.append(next(self.scanners[0])) else: for record in self.scanners[0]: prev = self._lagRecords.pop(0) self._resultset.append( # cast to the record early so the tuples are not misunderstood tuple( (last,this) for last,this in zip(prev._tuple,record._tuple) ) ) self._lagRecords.append(record)
class Merge(Transform): """Combine the source recordsets into one recordset. The new record type will have all the source columns, with the caveat that later sources win for overlaps. """ ScanClass = ElementScanner def __init__(self, sources, *args, **kwargs): # Initialize mixins super(Merge, self).__init__(*args, **kwargs) self.sources = tuple(sources) self._resolveSources() def _resolveSources(self): """Sources may overlap: if so, only take the latter.""" rawSources = [ source.results if isinstance(source, Composable) else source for source in self.sources ] allFields = [] # Gather all the fields for source in rawSources: for field in source._RecordType._fields: allFields.append(field) scanners = [] sourceFields = set(allFields) for source in reversed(rawSources): for field in source._RecordType._fields: if field in sourceFields: sourceFields.remove(field) scanners.append((field, self.ScanClass(source, field))) if not sourceFields: break if not sourceFields: break # While we want to prioritize later sources, the fields should # likely keep the same order, starting with the earlier sources. # see https://stackoverflow.com/a/12814719/1943640 scanners.sort(key=lambda entry: allFields.index(entry[0])) self.scanners = tuple(scanner for field, scanner in scanners) self._resultset = RecordSet(recordType=genRecordType( field for field, scanner in scanners)) def transform(self): """Simply scan down the sources, generating new records.""" self._resultset.append( tuple( self._resultset.coerceRecordType(newRecordValues) for newRecordValues in zip(*self.scanners)))
def test_misalignment_2(self): # source rsa = [(1,2,3,4)] rsb = [(0,1,0,1)] rss = RecordSet(recordType='ab') for g in zip(rsa,rsb): rss.append(v for v in zip(*g)) # target - longer rsa = [(1,2,3),(4,),(5,6,7),(8,9,10)] rsb = [(0,1,0),(1,),(0,1,0),(1,0,10)] rst = RecordSet(recordType='ef') for g in zip(rsa,rsb): rst.append(v for v in zip(*g)) regroup = Regroup(rss, rst) # verify it has the same columns as the target self.assertEqual( regroup.results._RecordType._fields, ('e', 'f') ) # Source only has one group, so that alone gets mapped self.assertEqual( [[record._tuple for record in group] for group in regroup], [[(1, 0), (2, 1), (3, 0), (4, 1)]] ) # adding two more groups... rss.extend( [ ((5,0),(6,1)), ((7,0),(8,1),(9,0)) ] ) # ... allows two more groups to be added. # Again, note that the last target record is omitted, though, # since the source doesn't have a group to map to it self.assertEqual( [[record._tuple for record in group] for group in regroup], [[(1, 0), (2, 1), (3, 0), (4, 1)], [(5, 0), (6, 1)], [(7, 0), (8, 1), (9, 0)]] )
class Pivot(Transform): """Rotate groups of records into a record of lists. [({a:4,b:3},{a:6,b:5},{a:8,b:7}),({a:10,b:9},{a:12,b:11})] becomes [({a:(4,6,8),b:(3,5,7)}),({a:(10,12),b:(9,11)})] """ ScanClass = GroupScanner def __init__(self, source, *args, **kwargs): # Initialize mixins super(Pivot, self).__init__(*args, **kwargs) self.sources = (source, ) self._resultset = RecordSet(recordType=source._RecordType) self.scanners = (self.ScanClass(self.sources[0]), ) def transform(self): for group in self.scanners[0]: self._resultset.append( # cast to the record early so the tuples are not misunderstood self._resultset.coerceRecordType(tuple(zip(*group))))
def test_misalignment_1(self): # source rsa = [(1,2,3,4),(5,6),(7,8,9)] rsb = [(0,1,0,1),(0,1),(0,1,0)] rss = RecordSet(recordType='ab') for g in zip(rsa,rsb): rss.append(v for v in zip(*g)) # target - shorter rsa = [(1,2,3),(4,),(5,6,7)] rsb = [(0,1,0),(1,),(0,1,0)] rst = RecordSet(recordType='ef') for g in zip(rsa,rsb): rst.append(v for v in zip(*g)) regroup = Regroup(rss, rst) # verify it has the same columns as the target self.assertEqual( regroup.results._RecordType._fields, ('e', 'f') ) # Up to 7 records can be grouped. The final source group # must be incomplete, and is omitted self.assertEqual( [[record._tuple for record in group] for group in regroup], [[(1, 0), (2, 1), (3, 0), (4, 1)], [(5, 0), (6, 1)]] ) # adding three more means the target has at least enough to complete rst.append( [(8,1),(9,0)] ) # note that the last is omitted, though, # since the source doesn't have a group to map to it self.assertEqual( [[record._tuple for record in group] for group in regroup], [[(1, 0), (2, 1), (3, 0), (4, 1)], [(5, 0), (6, 1)], [(7, 0), (8, 1), (9, 0)]] )
from ligature.recordset import RecordSet def genData(columns, rows, start=0): if not isinstance(columns, int): columns = len(columns) if columns == 1: return (i for i in range(start, start + rows * columns, columns)) else: return (tuple(range(i, i + columns)) for i in range(start, start + rows * columns, columns)) a1 = [(1, 2, 3, 4), (5, 6), (7, 8, 9)] b1 = [(0, 1, 0, 1), (0, 1), (0, 1, 0)] a2 = [(11, 12, 13), (14, 15, 16)] b2 = [(1, 0, 1), (0, 1, 0)] simpleRecordSet = RecordSet(recordType='ab') for g in zip(a1, b1): simpleRecordSet.append(v for v in zip(*g)) simpleAddition = RecordSet(recordType='ab') for g in zip(a2, b2): simpleAddition.append(v for v in zip(*g))