def test_create_index_on_single_real_field(self): self.db.personas.create_index([ ('size', float) ]) row = self.db._connection.execute( 'SELECT indexes FROM plume_master ' 'WHERE collection_name = "personas";' ).fetchone() indexes = json.loads(row[0]) assert indexes == { 'indexes': [ { 'keys': [['size', 'REAL', 'ASC']], 'name': 'personas_index_size' } ], 'indexed_fields': ['size'], 'formated_indexed_fields': ['"size"'] } columns = table_info(self.db, 'personas') assert len(columns) == 3 assert columns[2][1] == 'size' assert columns[2][2] == 'REAL' indexes = index_list(self.db, '"personas"') assert len(indexes) == 1 assert indexes[0][1] == 'personas_index_size'
def test_create_index_on_nested_field(self): self.db.personas.create_index([ ('meta.mastodon_profile', str) ]) row = self.db._connection.execute( 'SELECT indexes FROM plume_master ' 'WHERE collection_name = "personas";' ).fetchone() indexes = json.loads(row[0]) assert indexes == { 'indexes': [ { 'keys': [ ['meta.mastodon_profile', 'TEXT', 'ASC'] ], 'name': 'personas_index_meta.mastodon_profile' } ], 'indexed_fields': ['meta.mastodon_profile'], 'formated_indexed_fields': ['"meta.mastodon_profile"'] } columns = table_info(self.db, 'personas') assert len(columns) == 3 assert columns[2][1] == 'meta.mastodon_profile' assert columns[2][2] == 'TEXT' indexes = index_list(self.db, '"personas"') assert len(indexes) == 1 assert indexes[0][1] == ( 'personas_index_meta.mastodon_profile' )
def test_create_index_with_multiple_fields(self): self.db.personas.create_index([ ('name', str), ('age', int), ]) row = self.db._connection.execute( 'SELECT indexes FROM plume_master ' 'WHERE collection_name = "personas";' ).fetchone() indexes = json.loads(row[0]) assert indexes == { 'indexes': [ { 'keys': [ ['name', 'TEXT', 'ASC'], ['age', 'INTEGER', 'ASC'], ], 'name': 'personas_index_name_age' } ], 'indexed_fields': ['name', 'age'], 'formated_indexed_fields': ['"name"', '"age"'] } columns = table_info(self.db, 'personas') assert len(columns) == 4 assert columns[2][1] == 'name' assert columns[2][2] == 'TEXT' assert columns[3][1] == 'age' assert columns[3][2] == 'INTEGER' indexes = index_list(self.db, '"personas"') assert len(indexes) == 1 assert indexes[0][1] == 'personas_index_name_age'
def get_column_map(top_line, bottom_line): """ get a dict of the form:: map = {0: index0, 1: index1, ...} so if you're wondering what column in the top line corresponds to which one in the bottom line, you can use this mapping. To apply a column mapping easily, use apply_col_mapping() but the ideas is that you just say you just say:: a = top line b = bottom line map = get_column_map(a, b) b = [b[map[i]] for i in range(len(b))] and then b has the same column order as a. top_line: list of floats bottom_line: list of floats """ # abbreviate so they're easier to type a = top_line b = bottom_line # ensure lines have same length, so really we're making a mapping from # top_line to a chunk of bottom line assert len(a) == len(b) # this will be the mapping from "b ordering" to "a ordering" mapping = {n: n for n in range(len(b))} used_in_mapping = [False for _ in range(len(b))] used_in_mapping[0] = True # ignore zeroth column -- energies for i, a_i in enumerate(a): # skip 0th column (energies are fixed in place) if i == 0: continue # say distance to zeroth column is huge so we never pick energies distances = [1000000] + [dist(a_i, b_j) for b_j in b[1:]] # indices will be sorted smallest to largest # and must contain all numbers up to len(a) # I wanted to just use distanced.index(x) for x in sorted(distances) # but that only gives the first index indices = utils.index_list(distances[:len(a)]) # now take the index with the minimum distance that has not been used min_index = 0 while used_in_mapping[indices[min_index]]: min_index += 1 mapping[i] = indices[min_index] used_in_mapping[indices[min_index]] = True # check map is valid (if it's not injective we're going to have a bad time) outputs = [] for output in mapping.values(): if output in outputs: print(a) print(b) print(mapping) raise ValueError("somehow the column map is not injective...?") else: outputs.append(output) return mapping