Пример #1
0
    def test_bit_count1(self):
        a = bitarray('001111')
        aa = a.copy()
        b = bitarray('010011')
        bb = b.copy()
        self.assertEqual(count_and(a, b), 2)
        self.assertEqual(count_or(a, b), 5)
        self.assertEqual(count_xor(a, b), 3)
        for f in count_and, count_or, count_xor:
            # not two arguments
            self.assertRaises(TypeError, f)
            self.assertRaises(TypeError, f, a)
            self.assertRaises(TypeError, f, a, b, 3)
            # wrong argument types
            self.assertRaises(TypeError, f, a, '')
            self.assertRaises(TypeError, f, '1', b)
            self.assertRaises(TypeError, f, a, 4)
        self.assertEQUAL(a, aa)
        self.assertEQUAL(b, bb)

        b.append(1)
        for f in count_and, count_or, count_xor:
            self.assertRaises(ValueError, f, a, b)
            self.assertRaises(ValueError, f, bitarray('110', 'big'),
                              bitarray('101', 'little'))
Пример #2
0
 def test_bit_count_random(self):
     for n in list(range(50)) + [randint(1000, 2000)]:
         a = urandom(n)
         b = urandom(n)
         self.assertEqual(count_and(a, b), (a & b).count())
         self.assertEqual(count_or(a, b), (a | b).count())
         self.assertEqual(count_xor(a, b), (a ^ b).count())
Пример #3
0
 def test_count_byte(self):
     ones = bitarray(8)
     ones.setall(1)
     zeros = bitarray(8)
     zeros.setall(0)
     for i in range(0, 256):
         a = bitarray()
         a.frombytes(bytes(bytearray([i])))
         cnt = a.count()
         self.assertEqual(count_and(a, zeros), 0)
         self.assertEqual(count_and(a, ones), cnt)
         self.assertEqual(count_and(a, a), cnt)
         self.assertEqual(count_or(a, zeros), cnt)
         self.assertEqual(count_or(a, ones), 8)
         self.assertEqual(count_or(a, a), cnt)
         self.assertEqual(count_xor(a, zeros), cnt)
         self.assertEqual(count_xor(a, ones), 8 - cnt)
         self.assertEqual(count_xor(a, a), 0)
Пример #4
0
 def test_bit_count2(self):
     for n in list(range(50)) + [randint(1000, 2000)]:
         a = bitarray()
         a.frombytes(os.urandom(bits2bytes(n)))
         del a[n:]
         b = bitarray()
         b.frombytes(os.urandom(bits2bytes(n)))
         del b[n:]
         self.assertEqual(count_and(a, b), (a & b).count())
         self.assertEqual(count_or(a, b), (a | b).count())
         self.assertEqual(count_xor(a, b), (a ^ b).count())
Пример #5
0
 def test_bit_count_frozen(self):
     a = frozenbitarray('001111')
     b = frozenbitarray('010011')
     self.assertEqual(count_and(a, b), 2)
     self.assertEqual(count_or(a, b), 5)
     self.assertEqual(count_xor(a, b), 3)
Пример #6
0
def main():
    # >>>> Debugging <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
    # import argparse
    # args = argparse.Namespace()
    # args.trajectory = '../trajs/aligned_tau.dcd'
    # args.topology = '../trajs/aligned_tau.pdb'
    # args.nclust = np.inf
    # args.min_clust_size = 2
    # args.first = 1000
    # args.last = 6000
    # args.stride = 3
    # args.selection = 'all'
    # args.cutoff = 4
    # args.outdir = 'bitQT_outputs'
    # =========================================================================
    # 1. Creating binary matrix (adjacency list)
    # =========================================================================
    # ++++ Get adjacency matrix of trajectory as list of bitarrays ++++++++++++
    args = parse_arguments()

    try:
        os.makedirs(args.outdir)
    except FileExistsError:
        raise Exception('{} directory already exists.'.format(args.outdir) +
                        'Please specify another location or rename it.')

    trajectory = load_raw_traj(args.trajectory, valid_trajs, args.topology)
    trajectory = shrink_traj_selection(trajectory, args.selection)
    N1 = trajectory.n_frames
    trajectory = shrink_traj_range(args.first, args.last, args.stride, trajectory)
    trajectory.center_coordinates()
    matrix = calc_rmsd_matrix(trajectory, args)
    # ++++ Tracking clust/uNCLUSTERed bits to avoid re-computations +++++++++++
    N = len(matrix[0])
    m = len(matrix)
    unclust_bit = ba(N)
    unclust_bit.setall(1)
    clustered_bit = unclust_bit.copy()
    clustered_bit.setall(0)
    zeros = np.zeros(N, dtype=np.int32)
    # ++++ Save clusters in an array (1 .. N) +++++++++++++++++++++++++++++++++
    clusters_array = np.zeros(N, dtype=np.int32)
    NCLUSTER = 0
    clustered = set()
    nmembers = []
    # ++++ Coloring ordered vertices (1 .. N) +++++++++++++++++++++++++++++++++
    degrees = calc_matrix_degrees(unclust_bit, matrix)
    ordered_by_degs = degrees.argsort()[::-1]
    colors = colour_matrix(ordered_by_degs, matrix)
    # colors[np.frombuffer(clustered_bit.unpack(), dtype=np.bool)] = 0

    # =========================================================================
    # 2. Main algorithm: BitQT !
    # =========================================================================
    while True:
        NCLUSTER += 1
        # ++++ Find a big clique early ++++++++++++++++++++++++++++++++++++++++
        big_node = degrees.argmax()
        bit_clique, big_clique = do_bit_cascade(big_node, degrees, colors,
                                                matrix, 0)
        big_clique_size = big_clique.size
        # ++++ Find promising nodes +++++++++++++++++++++++++++++++++++++++++++
        biggers = degrees > big_clique_size
        biggers[big_clique] = False
        cluster_colors = colors[big_clique]
        biggers_colors = colors[biggers]
        promising_colors = np.setdiff1d(biggers_colors, cluster_colors)
        promising_nodes = deque()
        for x in promising_colors:
            promising_nodes.extend(((colors == x) & biggers).nonzero()[0])
        # ++++ Explore all promising nodes ++++++++++++++++++++++++++++++++++++
        cum_found = big_clique
        while promising_nodes:
            node = promising_nodes.popleft()
            try:
                bit_clique, clique = do_bit_cascade(node, degrees, colors,
                                                    matrix, big_clique_size)
                CLIQUE_SIZE = len(clique)
            except TypeError:
                CLIQUE_SIZE = 0
            # ++++ Cumulative update only if biggers candidates are found +++++
            if CLIQUE_SIZE > big_clique_size:
                big_node = node
                big_clique = clique
                big_clique_size = big_clique.size
                # ++++ Repeat previous condition ++++++++++++++++++++++++++++++
                cum_found = np.concatenate((cum_found, big_clique))
                biggers = degrees > big_clique_size
                biggers[cum_found] = False
                cluster_colors = colors[big_clique]
                biggers_colors = colors[biggers]
                promising_colors = np.setdiff1d(biggers_colors, cluster_colors)
                promising_nodes = deque()
                for x in promising_colors:
                    promising_nodes.extend(((colors == x) & biggers).nonzero()[0])
        nmembers.append(big_clique_size)

        if (big_clique_size < args.min_clust_size) or (NCLUSTER == args.nclust):
            break

        # ++++ Save new cluster & update NCLUSTER +++++++++++++++++++++++++++++
        clusters_array[big_clique] = NCLUSTER
        # ++++ Update (un)clustered_bit +++++++++++++++++++++++++++++++++++++++
        clustered.update(big_clique)
        clustered_bit = set_to_bitarray(clustered, N)
        unclust_bit = ~clustered_bit
        # ++++ Hard erasing of clustered frames from matrix +++++++++++++++++++
        degrees = zeros.copy()
        for x in unclust_bit[:m].itersearch(ba('1')):
            degrees[x] = matrix[x].count()
            if bu.count_and(matrix[x], clustered_bit):
                matrix[x] &= (matrix[x] ^ clustered_bit)

    # =========================================================================
    # 3. Output
    # =========================================================================
    # saving pickle for api debugging tests
    outname = os.path.basename(args.topology).split('.')[0]
    pickle_to_file(clusters_array, os.path.join(args.outdir,
                                                '{}.pick'.format(outname)))
    # saving VMD visualization script
    to_VMD(args.outdir, args.topology, args.first, args.last, N1, args.stride,
           clusters_array[:m])
    # saving clustering info  files
    frames_stats = get_frames_stats(N1, args.first, args.last, args.stride,
                                    clusters_array[:m], args.outdir)
    cluster_stats = get_cluster_stats(clusters_array[:m], args.outdir)
    print('\n\nNormal Termination of BitQT :)')
Пример #7
0
def bitclusterize(matrix, degrees, args):
    '''
    DESCRIPTION
    Clusters the bit matrix using bitwise operations.

    Args:
        matrix (list): list of bitarrays
        degrees (collections.OrderedDict): dict of bitarrays lenghts.
    Return:
        clusters (numpy.ndarray): array of clusters ID.
        leaders (list) : list of clusters´ centers ID.
    '''
    degrees = np.asarray([degrees[x] for x in degrees])
    # Memory allocation for clusters container --------------------------------
    clusters = np.empty(len(matrix), dtype='int32')
    clusters.fill(-1)
    # Declare all bits as available -------------------------------------------
    available_bits = ba(int(len(degrees)))
    available_bits.setall('1')
    # Start iterative switching -----------------------------------------------
    leaders = []
    clust_id = 0
    ncluster = -1
    while True:
        ncluster += 1
        # Break 0: break if max number of cluster was reached -----------------
        if ncluster > args.max_clust:
            break
        # Find the biggest cluster --------------------------------------------
        leader = degrees.argmax()
        # Break 1: all candidates cluster have degree 1 (can´t clusterize) ----
        if degrees.sum() == np.nonzero(degrees)[0].size:
            # return clusters, leaders
            break
        biggest_cluster = matrix[leader] & available_bits
        biggest_cluster_list = np.frombuffer(biggest_cluster.unpack(),
                                             dtype=np.bool)
        # Break 2: all candidates cluster have degree < minsize ---------------
        if biggest_cluster_list.sum() < args.minsize:
            # return clusters, leaders
            break
        # Break 3: No more candidates available (empty matrix) ----------------
        if degrees.sum() == 0:
            # return clusters, leaders
            break
        degrees[biggest_cluster_list] = 0
        available_bits = (available_bits ^ biggest_cluster) & available_bits
        if biggest_cluster.count() <= 1:
            leaders.append(-1)
            # return clusters, leaders
            break
        else:
            leaders.append(leader)
            # Assign next cluster ID ------------------------------------------
            clusters[biggest_cluster_list] = clust_id
            clust_id += 1
        # Update degrees of unclustered frames --------------------------------
        for degree in available_bits.itersearch(ba('1')):
            # degrees[degree] = ba.fast_hw_and(available_bits, matrix[degree])
            degrees[degree] = bau.count_and(available_bits, matrix[degree])
    return clusters, leaders