def _make_groups(self, trajectory_categories, sort_category): r"""Groups the sample ids in `self._metadata_map` by the values in `trajectory_categories` Creates `self._groups`, a dictionary keyed by category and values are dictionaries in which the keys represent the group name within the category and values are ordered lists of sample ids If `sort_category` is not None, the sample ids are sorted based on the values under this category in the metadata map. Otherwise, they are sorted using the sample id. Parameters ---------- trajectory_categories : list of str A list of metadata categories to use to create the groups. Default: None, compute all of them sort_category : str or None The category from self._metadata_map to use to sort groups """ # If sort_category is provided, we used the value of such category to # sort. Otherwise, we use the sample id. if sort_category: sort_val = lambda sid: self._metadata_map[sort_category][sid] else: sort_val = lambda sid: sid self._groups = defaultdict(dict) for cat in trajectory_categories: # Group samples by category gb = self._metadata_map.groupby(cat) for g, df in gb: sorted_list = signed_natsort([(sort_val(sid), sid) for sid in df.index]) self._groups[cat][g] = [val[1] for val in sorted_list]
def _make_groups(self, trajectory_categories, sort_category): r"""Groups the sample ids in `self._metadata_map` by the values in `trajectory_categories` Creates `self._groups`, a dictionary keyed by category and values are dictionaries in which the keys represent the group name within the category and values are ordered lists of sample ids If `sort_category` is not None, the sample ids are sorted based on the values under this category in the metadata map. Otherwise, they are sorted using the sample id. Parameters ---------- trajectory_categories : list of str A list of metadata categories to use to create the groups. Default: None, compute all of them sort_category : str or None The category from self._metadata_map to use to sort groups """ # If sort_category is provided, we used the value of such category to # sort. Otherwise, we use the sample id. if sort_category: sort_val = lambda sid: self._metadata_map[sort_category][sid] else: sort_val = lambda sid: sid self._groups = defaultdict(dict) for cat in trajectory_categories: # Group samples by category gb = self._metadata_map.groupby(cat) for g, df in gb: sorted_list = signed_natsort([(sort_val(sid), sid) for sid in df.index]) self._groups[cat][g] = [val[1] for val in sorted_list]
def test_signed_sort(self): """Test correct sorting of different data types""" # an empty list must be returned when an empty list needs to be sorted self.assertEqual(signed_natsort([]), []) # tuples that can be sorted by type-casting the first element test_list = [('9', 'SampleA'), ('-1', 'SampleD'), ('7', 'SampleC'), ('-2', 'SampleE'), ('-0.11', 'SampleF'), ('17.11', 'SampleB'), ('100', 'SampleG'), ('13', 'SampleH')] expected_result = [('-2', 'SampleE'), ('-1', 'SampleD'), ('-0.11', 'SampleF'), ('7', 'SampleC'), ('9', 'SampleA'), ('13', 'SampleH'), ('17.11', 'SampleB'), ('100', 'SampleG')] output = signed_natsort(test_list) self.assertEquals(output, expected_result) # tuples that must be sorted alphabetically test_list = [('Cygnus', 'SampleA'), ('Cepheus', 'SampleD'), ('Auriga', 'SampleC'), ('Grus', 'SampleE'), ('Hydra', 'SampleF'), ('Carina', 'SampleB'), ('Orion', 'SampleG'), ('Lynx', 'SampleH')] expected_result = [('Auriga', 'SampleC'), ('Carina', 'SampleB'), ('Cepheus', 'SampleD'), ('Cygnus', 'SampleA'), ('Grus', 'SampleE'), ('Hydra', 'SampleF'), ('Lynx', 'SampleH'), ('Orion', 'SampleG')] output = signed_natsort(test_list) self.assertEquals(output, expected_result) # mixed case, tuples will be sorted alpha-numerically test_list = [('Cygnus', 'SampleA'), ('Cepheus', 'SampleD'), ('Auriga', 'SampleC'), ('Grus', 'SampleE'), ('-0.11', 'SampleF'), ('17.11', 'SampleB'), ('100', 'SampleG'), ('Lynx', 'SampleH')] expected_result = [('17.11', 'SampleB'), ('100', 'SampleG'), ('-0.11', 'SampleF'), ('Auriga', 'SampleC'), ('Cepheus', 'SampleD'), ('Cygnus', 'SampleA'), ('Grus', 'SampleE'), ('Lynx', 'SampleH')] output = signed_natsort(test_list) self.assertEquals(output, expected_result) # mixed case just a list test_list = ['foo', 'bar', '-100', '12', 'spam', '4', '-1'] expected_result = ['4', '12', '-1', '-100', 'bar', 'foo', 'spam'] output = signed_natsort(test_list) self.assertEquals(output, expected_result) # list of elements that can be type-casted test_list = ['0', '1', '14', '12', '-15', '4', '-1'] expected_result = ['-15', '-1', '0', '1', '4', '12', '14'] output = signed_natsort(test_list) self.assertEquals(output, expected_result) # mixed dict case test_dict = {'foo': 'a', 'bar': 'b', '-100': '1', '12': '11', 'spam': 'q', '4': '11', '-1': 'e'} expected_result = ['4', '12', '-1', '-100', 'bar', 'foo', 'spam'] output = signed_natsort(test_dict) self.assertEquals(output, expected_result) # dict where the keys can be type-casted test_dict = {'0': 'foo', '1': 'bar', '14': 'stand', '12': 'eggs', '-15': 'q', '4': 'b', '-1': 'h'} expected_result = ['-15', '-1', '0', '1', '4', '12', '14'] output = signed_natsort(test_dict) self.assertEquals(output, expected_result)
def test_signed_sort(self): """Test correct sorting of different data types""" # an empty list must be returned when an empty list needs to be sorted self.assertEqual(signed_natsort([]), []) # tuples that can be sorted by type-casting the first element test_list = [('9', 'SampleA'), ('-1', 'SampleD'), ('7', 'SampleC'), ('-2', 'SampleE'), ('-0.11', 'SampleF'), ('17.11', 'SampleB'), ('100', 'SampleG'), ('13', 'SampleH')] expected_result = [('-2', 'SampleE'), ('-1', 'SampleD'), ('-0.11', 'SampleF'), ('7', 'SampleC'), ('9', 'SampleA'), ('13', 'SampleH'), ('17.11', 'SampleB'), ('100', 'SampleG')] output = signed_natsort(test_list) self.assertEquals(output, expected_result) # tuples that must be sorted alphabetically test_list = [('Cygnus', 'SampleA'), ('Cepheus', 'SampleD'), ('Auriga', 'SampleC'), ('Grus', 'SampleE'), ('Hydra', 'SampleF'), ('Carina', 'SampleB'), ('Orion', 'SampleG'), ('Lynx', 'SampleH')] expected_result = [('Auriga', 'SampleC'), ('Carina', 'SampleB'), ('Cepheus', 'SampleD'), ('Cygnus', 'SampleA'), ('Grus', 'SampleE'), ('Hydra', 'SampleF'), ('Lynx', 'SampleH'), ('Orion', 'SampleG')] output = signed_natsort(test_list) self.assertEquals(output, expected_result) # mixed case, tuples will be sorted alpha-numerically test_list = [('Cygnus', 'SampleA'), ('Cepheus', 'SampleD'), ('Auriga', 'SampleC'), ('Grus', 'SampleE'), ('-0.11', 'SampleF'), ('17.11', 'SampleB'), ('100', 'SampleG'), ('Lynx', 'SampleH')] expected_result = [('17.11', 'SampleB'), ('100', 'SampleG'), ('-0.11', 'SampleF'), ('Auriga', 'SampleC'), ('Cepheus', 'SampleD'), ('Cygnus', 'SampleA'), ('Grus', 'SampleE'), ('Lynx', 'SampleH')] output = signed_natsort(test_list) self.assertEquals(output, expected_result) # mixed case just a list test_list = ['foo', 'bar', '-100', '12', 'spam', '4', '-1'] expected_result = ['4', '12', '-1', '-100', 'bar', 'foo', 'spam'] output = signed_natsort(test_list) self.assertEquals(output, expected_result) # list of elements that can be type-casted test_list = ['0', '1', '14', '12', '-15', '4', '-1'] expected_result = ['-15', '-1', '0', '1', '4', '12', '14'] output = signed_natsort(test_list) self.assertEquals(output, expected_result) # mixed dict case test_dict = { 'foo': 'a', 'bar': 'b', '-100': '1', '12': '11', 'spam': 'q', '4': '11', '-1': 'e' } expected_result = ['4', '12', '-1', '-100', 'bar', 'foo', 'spam'] output = signed_natsort(test_dict) self.assertEquals(output, expected_result) # dict where the keys can be type-casted test_dict = { '0': 'foo', '1': 'bar', '14': 'stand', '12': 'eggs', '-15': 'q', '4': 'b', '-1': 'h' } expected_result = ['-15', '-1', '0', '1', '4', '12', '14'] output = signed_natsort(test_dict) self.assertEquals(output, expected_result)