Exemplo n.º 1
0
    def test_alternating_order_insertion(self):
        '''Test alternating order generation.'''
        
        # Minimum possible depth = [log2(n)]+1 where n=size of data, so here 4
        data = np.array([5, 1, 2, 7, 8, 3, 4, 6, 9])
        n = np.size(data)
        
        # Original list order
        self._test_tree_depth(data, 5)
        # Ordered insertion is worst case!
        sorted_data = sort(data)
        self._test_tree_depth(sorted_data, 9)
        # Alternating order is not so good either 
        self._test_tree_depth(sorted_data[alternating_order(n)], 5)
        # Neither does random order guarantee anything
        self._test_tree_depth(data[np.random.permutation(n)], 4, 9)

        # Best order. O(n log n) complexity to sort the list.
        self._test_tree_depth(sorted_data[optimal_insertion_order(n)], 4)
        self._test_tree_depth([8, 4, 2, 1, 3, 6, 5, 7, 9], 4)
        
        # Best order holds for a general list. All numbers in the list must be different.
        n = 100
        data = np.random.uniform(size=n)#np.random.permutation(n)
        sorted_data = sort(data)
        depth = int(np.floor(np.log2(n)))+1
        self._test_tree_depth(sorted_data[optimal_insertion_order(n)], depth)
        # Same code using a util function
        b = sequence_to_tree(sorted_data)
        assert_equal(b.depth(), depth, 'Wrong tree depth')
Exemplo n.º 2
0
 def __init__(self, data, snp, sample_id):
     '''
     Construct a genotype set from data arrays:
     - snp: SNP metadata record array (contains chromosome, name, morgans, base-pair location)
     - data: a 3-D genotype data array: (individual x SNP x allele)
     - sample_id: genotyped individuals' ID set
     '''       
     # People's IDs
     self.sample_id = sample_id
     self.data = data
     self._num_snps = self.data.shape[0]
     self._num_samples = self.data.shape[1]
     self._snp_range = None
     
     # SNP metadata: SNP label, chromosome number, Genetic distance in Morgans, and
     # base pair location for each SNP
     self.snp = snp
     # Base-pair-location to snp-index map, lazily-initialized + cached
     base_pair = self.snp['base_pair']
     self._base_pair = base_pair  # np.array([int(base_pair)]) if base_pair.size == 1 else base_pair
     self._bp_to_snp = dict_invert(dict(enumerate(self._base_pair)))
     # Construct a BST for fast bp queries
     self._snp_tree = BinarySearchTree(values=self._base_pair[optimal_insertion_order(self._num_snps)])
     self._snp_index_tree = util.list_index_tree(self._base_pair)
     # A genetic map: lists the two allele letters corresponding to 1 and 2 for each SNP, according
     # their order in the self.snp array.
     self.map = []
     # General metadata, for easy handling of CGI data
     self.metadata = []
                 
     # samples for which the parent-of-origin phase is determined
     self.poo_phase = np.zeros((self._num_samples,), dtype=np.byte)