Exemplo n.º 1
0
    def unique_index(self):
        from element import Element
        if len(self.bincounts) == 0:
            sorted = self.sorted
            bincounts = rl.encode(sorted.mask, self.sorted)
            offsets = np.cumsum(bincounts) - bincounts
            return self.sort_index[offsets]
        else:
            bincounts = np.asarray(self.bincounts[0]).astype(np.int)
            bincounts[~self.bincounts[0].mask] = 0
            binid = np.repeat(np.arange(bincounts.size),
                              bincounts)[self.sort_index.flattened]
            sorted = self.sorted.flattened

            array = sorted[:]
            array[~sorted.mask] = 0
            bincounts = rl.encode(binid, sorted.mask, array)
            offsets = np.cumsum(bincounts) - bincounts

            bincounts2 = rl.encode(binid[offsets])
            offsets2 = np.cumsum(bincounts2) - 1

            bincounts3 = np.zeros(self.bincounts[0].size, np.int)
            bincounts3[binid[offsets][offsets2]] = bincounts2

            bincounts3 = [Atom(bincounts3, mask=self.bincounts[0].mask)]
            bincounts3.extend(self.bincounts[1:])
            return Atom(self.sort_index.flattened[offsets],
                        mask=sorted.mask[offsets],
                        bincounts=bincounts3)
Exemplo n.º 2
0
class WordCountTests(unittest.TestCase):
    def test_encode(self):
        self.assertMultiLineEqual('2A3B4C', encode('AABBBCCCC'))

    def test_decode(self):
        self.assertMultiLineEqual('AABBBCCCC', decode('2A3B4C'))

    def test_encode_with_single(self):
        self.assertMultiLineEqual(
            '12WB12W3B24WB',
            encode('WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWB'))

    def test_decode_with_single(self):
        self.assertMultiLineEqual(
            'WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWB',
            decode('12WB12W3B24WB'))

    def test_combination(self):
        self.assertMultiLineEqual('zzz ZZ  zZ', decode(encode('zzz ZZ  zZ')))

    encode('zzz ZZ  zZ')

    def test_encode_unicode_s(self):
        self.assertMultiLineEqual('⏰3⚽2⭐⏰', encode('⏰⚽⚽⚽⭐⭐⏰'))

    def test_decode_unicode(self):
        self.assertMultiLineEqual('⏰⚽⚽⚽⭐⭐⏰', decode('⏰3⚽2⭐⏰'))
Exemplo n.º 3
0
def group_index(self, *a):
    '''
    '''
    from atom import Atom
    import run_length as rl
    cnames = by(self, *a)
    sorted_self = sort_by(self, *cnames)
    sorted_columns = getcolumns(sorted_self, *cnames)
    return Atom(sort_index(self, *cnames),
                bincounts=[rl.encode(*sorted_columns)])
Exemplo n.º 4
0
def group_index(self, *a):
    '''
    '''
    from atom import Atom
    import run_length as rl
    cnames = by(self, *a)
    sorted_self = sort_by(self, *cnames)
    sorted_columns = getcolumns(sorted_self, *cnames)
    return Atom(sort_index(self, *cnames),
                bincounts=[rl.encode(*sorted_columns)])
Exemplo n.º 5
0
 def __init__(self, join, container, outer, axis, *keys):
     self.size           = len(container)
     self.join           = join
     self.axis           = axis
     self.outer          = outer
     _sort_index         = sort_index(container, *keys)
     self.columns        = getcolumns(container[_sort_index], *keys)
     self.bin_counts     = rl.encode(*self.columns)
     self.unique         = [Atom(column, bincounts=[self.bin_counts]).first
                            for column in self.columns]
     self.inverted_index = Atom(_sort_index, bincounts=[self.bin_counts])
Exemplo n.º 6
0
 def __init__(self, left, right, keys, left_outer=False, right_outer=False):
     self.left = join.Side(self, left, left_outer, 0, *keys.values())
     self.right = join.Side(self, right, right_outer, 1, *keys.keys())
     self.left.other, self.right.other = self.right, self.left
     joined_columns = [np.concatenate([self.left.unique[i], self.right.unique[i]])
                       for i in xrange(0, len(keys))]
     _sort_index = np.lexsort(joined_columns)
     _bin_counts = rl.encode(*[x[_sort_index] for x in joined_columns])
     self.side_indexes = Atom(np.concatenate([np.arange(self.left.bin_counts.size),
                                                np.arange(self.right.bin_counts.size)])[_sort_index],
                                bincounts=[_bin_counts]) 
Exemplo n.º 7
0
 def sort_index(self):
     array = self.asarray()
     if array.dtype == np.datetime64:
         sort_index = np.argsort(array)
         inverse_sort_index = np.empty(len(array), dtype=int)
         inverse_sort_index[sort_index] = np.arange(len(array))
         sorted_array = array[sort_index]
         bincounts = rl.encode(sorted_array)
         array = np.repeat(
             np.array([
                 int(re.sub('[-: ]', '', str(x)))
                 for x in sorted_array[bincounts.cumsum() - bincounts]
             ]), bincounts)[inverse_sort_index]
     if len(self.bincounts) == 0:
         sort_index = np.lexsort([self.mask, array])
         return Atom(sort_index, mask=self.mask[sort_index])
     else:
         bincounts = np.asarray(self.bincounts[0]).astype(np.int)
         bincounts[~self.bincounts[0].mask] = 0
         binid = np.repeat(np.arange(bincounts.size), bincounts)
         sort_index = np.lexsort([self.mask, array, binid])
         return Atom(sort_index,
                     mask=self.mask[sort_index],
                     bincounts=self.bincounts)
Exemplo n.º 8
0
 def test_encode(self):
     self.assertMultiLineEqual("2A3B4C", encode("AABBBCCCC"))
Exemplo n.º 9
0
 def test_encode_unicode_s(self):
     self.assertMultiLineEqual('⏰3⚽2⭐⏰', encode('⏰⚽⚽⚽⭐⭐⏰'))
Exemplo n.º 10
0
 def test_combination(self):
     self.assertMultiLineEqual('zzz ZZ  zZ', decode(encode('zzz ZZ  zZ')))
Exemplo n.º 11
0
 def test_encode_with_single(self):
     self.assertMultiLineEqual(
         '12WB12W3B24WB',
         encode('WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWB'))
Exemplo n.º 12
0
 def test_encode_with_single(self):
     self.assertMultiLineEqual(
         '12WB12W3B24WB',
         encode('WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWB'))
Exemplo n.º 13
0
 def test_encode_unicode_s(self):
     self.assertMultiLineEqual('⏰3⚽2⭐⏰', encode('⏰⚽⚽⚽⭐⭐⏰'))
Exemplo n.º 14
0
 def test_encode_unicode_s(self):
     self.assertMultiLineEqual("⏰3⚽2⭐⏰", encode("⏰⚽⚽⚽⭐⭐⏰"))
Exemplo n.º 15
0
 def test_combination(self):
     self.assertMultiLineEqual("zzz ZZ  zZ", decode(encode("zzz ZZ  zZ")))
Exemplo n.º 16
0
 def test_encode_with_single(self):
     self.assertMultiLineEqual("12WB12W3B24WB", encode("WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWB"))
Exemplo n.º 17
0
 def unique_counts(self):
     return rl.encode(self.sorted)
Exemplo n.º 18
0
from run_length import encode, decode

print(encode("AABBCCDDEEE"))
print("2A2B2C2D3E")
print(encode("AAABCCDDDDAAB"))
print("3A1B2C4D2A1B")

print()
print(decode("3F2A1B4D1C"))
print("FFFAABDDDDC")
print(decode("1A1B1A1B1A2B2A"))
print("ABABABBAA")
Exemplo n.º 19
0
 def test_combination(self):
     self.assertMultiLineEqual('zzz ZZ  zZ', decode(encode('zzz ZZ  zZ')))
Exemplo n.º 20
0
Arquivo: test.py Projeto: qpzm/PS
 def test_encode(self):
     self.assertEqual(run_length.encode('HHHeellloWooorrrrlld!!'),
                      'H3e2l3o1W1o3r4l2d1!2')
Exemplo n.º 21
0
 def test_encode(self):
     self.assertMultiLineEqual('2A3B4C', encode('AABBBCCCC'))
Exemplo n.º 22
0
 def test_encode(self):
     self.assertMultiLineEqual('2A3B4C', encode('AABBBCCCC'))