示例#1
0
	def test_basic(self):

		rsa = [(1,2,3,4),(5,6),(7,8,9)]
		rsb = [(0,1,0,1),(0,1),(0,1,0)]
		rsc = [(1,2,3),(4,),(5,6,7,8,9)]
		rsd = [(0,1,0),(1,),(0,1,0,1,0)]

		# source
		rss = RecordSet(recordType='ab')
		for g in zip(rsa,rsb):
		    rss.append(v for v in zip(*g))

		# target
		rst = RecordSet(recordType='ef')
		for g in zip(rsc,rsd):
		    rst.append(v for v in zip(*g))

		regroup = Regroup(rss, rst)

		# verify it has the same columns as the target
		self.assertEqual(
			regroup.results._RecordType._fields,
			('e', 'f')
			)

		self.assertEqual(
			[[record._tuple for record in group] for group in regroup],
			[[(1, 0), (2, 1), (3, 0), (4, 1)], 
			 [(5, 0), (6, 1)], 
			 [(7, 0), (8, 1), (9, 0)]] 
			)
示例#2
0
    def test_basic(self):

        rsa = [(1, 2, 3, 4), (5, 6), (7, 8, 9)]
        rsb = [(0, 1, 0, 1), (0, 1), (0, 1, 0)]
        rsc = [(9, 8, 7, 6, 5), (4, 3, 2), (1, )]
        rsd = [(1, 0, 1, 0, 1), (0, 1, 0), (1, )]

        rs1 = RecordSet(recordType='ab')
        for g in zip(rsa, rsb):
            rs1.append(v for v in zip(*g))

        rs2 = RecordSet(recordType='cb')
        for g in zip(rsc, rsd):
            rs2.append(v for v in zip(*g))

        merge = Merge([rs1, rs2])

        self.assertEqual(merge.results._RecordType._fields, ('a', 'b', 'c'))

        self.assertEqual(merge.results._groups[0][0]._tuple, (1, 1, 9))

        self.assertEqual([[record._tuple for record in group]
                          for group in merge],
                         [[(1, 1, 9), (2, 0, 8), (3, 1, 7), (4, 0, 6),
                           (5, 1, 5), (6, 0, 4), (7, 1, 3), (8, 0, 2),
                           (9, 1, 1)]])
示例#3
0
class LagBucket(Transform):
    """Creates a new recordset of tuples of each of the given records,
    with the first being from `lag` records back.
    """
    __slots__ = ('_lag', '_lagRecords')
    ScanClass = RecordScanner
    
    def __init__(self, source, lag=1, *args, **kwargs):
        #Initialize mixins
        super(LagBucket, self).__init__(*args, **kwargs)
        
        self._lag = lag
        self.sources = (source,)
        self._resultset = RecordSet(recordType=source._RecordType)
        self.scanners = (self.ScanClass(source),)
        self._lagRecords = []
        
    def transform(self):
        while len(self._lagRecords) < self._lag:
            self._lagRecords.append(next(self.scanners[0]))
        else:
            for record in self.scanners[0]:
                prev = self._lagRecords.pop(0)
                self._resultset.append(
                    # cast to the record early so the tuples are not misunderstood
                        tuple(
                            (last,this)
                            for last,this
                            in zip(prev._tuple,record._tuple) ) )
                self._lagRecords.append(record)
示例#4
0
class Merge(Transform):
    """Combine the source recordsets into one recordset.
    The new record type will have all the source columns,
      with the caveat that later sources win for overlaps.
    """
    ScanClass = ElementScanner

    def __init__(self, sources, *args, **kwargs):
        # Initialize mixins
        super(Merge, self).__init__(*args, **kwargs)
        self.sources = tuple(sources)
        self._resolveSources()

    def _resolveSources(self):
        """Sources may overlap: if so, only take the latter."""
        rawSources = [
            source.results if isinstance(source, Composable) else source
            for source in self.sources
        ]

        allFields = []
        # Gather all the fields
        for source in rawSources:
            for field in source._RecordType._fields:
                allFields.append(field)

        scanners = []
        sourceFields = set(allFields)
        for source in reversed(rawSources):
            for field in source._RecordType._fields:
                if field in sourceFields:
                    sourceFields.remove(field)
                    scanners.append((field, self.ScanClass(source, field)))
                if not sourceFields:
                    break
            if not sourceFields:
                break

        # While we want to prioritize later sources, the fields should
        #   likely keep the same order, starting with the earlier sources.
        # see https://stackoverflow.com/a/12814719/1943640
        scanners.sort(key=lambda entry: allFields.index(entry[0]))

        self.scanners = tuple(scanner for field, scanner in scanners)
        self._resultset = RecordSet(recordType=genRecordType(
            field for field, scanner in scanners))

    def transform(self):
        """Simply scan down the sources, generating new records."""
        self._resultset.append(
            tuple(
                self._resultset.coerceRecordType(newRecordValues)
                for newRecordValues in zip(*self.scanners)))
示例#5
0
	def test_misalignment_2(self):

		# source
		rsa = [(1,2,3,4)]
		rsb = [(0,1,0,1)]
		rss = RecordSet(recordType='ab')
		for g in zip(rsa,rsb):
		    rss.append(v for v in zip(*g))

		# target - longer
		rsa = [(1,2,3),(4,),(5,6,7),(8,9,10)]
		rsb = [(0,1,0),(1,),(0,1,0),(1,0,10)]
		rst = RecordSet(recordType='ef')
		for g in zip(rsa,rsb):
		    rst.append(v for v in zip(*g))

		regroup = Regroup(rss, rst)


		# verify it has the same columns as the target
		self.assertEqual(
			regroup.results._RecordType._fields,
			('e', 'f')
			)

		# Source only has one group, so that alone gets mapped
		self.assertEqual(
			[[record._tuple for record in group] for group in regroup],
			[[(1, 0), (2, 1), (3, 0), (4, 1)]]
			)

		# adding two more groups...
		rss.extend( [
			 ((5,0),(6,1)),
             ((7,0),(8,1),(9,0))
            ] )

		# ... allows two more groups to be added.
		# Again, note that the last target record is omitted, though, 
		#   since the source doesn't have a group to map to it
		self.assertEqual(
			[[record._tuple for record in group] for group in regroup],
			[[(1, 0), (2, 1), (3, 0), (4, 1)], 
			 [(5, 0), (6, 1)], 
			 [(7, 0), (8, 1), (9, 0)]]
			)		
示例#6
0
class Pivot(Transform):
    """Rotate groups of records into a record of lists.
    [({a:4,b:3},{a:6,b:5},{a:8,b:7}),({a:10,b:9},{a:12,b:11})]
    becomes
    [({a:(4,6,8),b:(3,5,7)}),({a:(10,12),b:(9,11)})]
    """
    ScanClass = GroupScanner

    def __init__(self, source, *args, **kwargs):
        # Initialize mixins
        super(Pivot, self).__init__(*args, **kwargs)

        self.sources = (source, )
        self._resultset = RecordSet(recordType=source._RecordType)
        self.scanners = (self.ScanClass(self.sources[0]), )

    def transform(self):
        for group in self.scanners[0]:
            self._resultset.append(
                # cast to the record early so the tuples are not misunderstood
                self._resultset.coerceRecordType(tuple(zip(*group))))
示例#7
0
	def test_misalignment_1(self):

		# source
		rsa = [(1,2,3,4),(5,6),(7,8,9)]
		rsb = [(0,1,0,1),(0,1),(0,1,0)]
		rss = RecordSet(recordType='ab')
		for g in zip(rsa,rsb):
		    rss.append(v for v in zip(*g))

		# target - shorter
		rsa = [(1,2,3),(4,),(5,6,7)]
		rsb = [(0,1,0),(1,),(0,1,0)]
		rst = RecordSet(recordType='ef')
		for g in zip(rsa,rsb):
		    rst.append(v for v in zip(*g))

		regroup = Regroup(rss, rst)


		# verify it has the same columns as the target
		self.assertEqual(
			regroup.results._RecordType._fields,
			('e', 'f')
			)

		# Up to 7 records can be grouped. The final source group
		#   must be incomplete, and is omitted
		self.assertEqual(
			[[record._tuple for record in group] for group in regroup],
			[[(1, 0), (2, 1), (3, 0), (4, 1)], 
			 [(5, 0), (6, 1)]]
			)

		# adding three more means the target has at least enough to complete
		rst.append( [(8,1),(9,0)] )

		# note that the last is omitted, though, 
		#   since the source doesn't have a group to map to it
		self.assertEqual(
			[[record._tuple for record in group] for group in regroup],
			[[(1, 0), (2, 1), (3, 0), (4, 1)], 
			 [(5, 0), (6, 1)], 
			 [(7, 0), (8, 1), (9, 0)]]
			)		
示例#8
0
from ligature.recordset import RecordSet


def genData(columns, rows, start=0):
    if not isinstance(columns, int):
        columns = len(columns)
    if columns == 1:
        return (i for i in range(start, start + rows * columns, columns))
    else:
        return (tuple(range(i, i + columns))
                for i in range(start, start + rows * columns, columns))


a1 = [(1, 2, 3, 4), (5, 6), (7, 8, 9)]
b1 = [(0, 1, 0, 1), (0, 1), (0, 1, 0)]

a2 = [(11, 12, 13), (14, 15, 16)]
b2 = [(1, 0, 1), (0, 1, 0)]

simpleRecordSet = RecordSet(recordType='ab')
for g in zip(a1, b1):
    simpleRecordSet.append(v for v in zip(*g))

simpleAddition = RecordSet(recordType='ab')
for g in zip(a2, b2):
    simpleAddition.append(v for v in zip(*g))