예제 #1
0
    def test_create_index_on_single_real_field(self):
        self.db.personas.create_index([
            ('size', float)
        ])
        row = self.db._connection.execute(
            'SELECT indexes FROM plume_master '
            'WHERE collection_name = "personas";'
        ).fetchone()
        indexes = json.loads(row[0])
        assert indexes == {
            'indexes': [
                {
                    'keys': [['size', 'REAL', 'ASC']],
                    'name': 'personas_index_size'
                }
            ],
            'indexed_fields': ['size'],
            'formated_indexed_fields': ['"size"']
        }

        columns = table_info(self.db, 'personas')
        assert len(columns) == 3
        assert columns[2][1] == 'size'
        assert columns[2][2] == 'REAL'

        indexes = index_list(self.db, '"personas"')
        assert len(indexes) == 1
        assert indexes[0][1] == 'personas_index_size'
예제 #2
0
    def test_create_index_on_nested_field(self):
        self.db.personas.create_index([
            ('meta.mastodon_profile', str)
        ])
        row = self.db._connection.execute(
            'SELECT indexes FROM plume_master '
            'WHERE collection_name = "personas";'
        ).fetchone()
        indexes = json.loads(row[0])

        assert indexes == {
            'indexes': [
                {
                    'keys': [
                        ['meta.mastodon_profile', 'TEXT', 'ASC']
                    ],
                    'name': 'personas_index_meta.mastodon_profile'
                }
            ],
            'indexed_fields': ['meta.mastodon_profile'],
            'formated_indexed_fields': ['"meta.mastodon_profile"']
        }

        columns = table_info(self.db, 'personas')
        assert len(columns) == 3
        assert columns[2][1] == 'meta.mastodon_profile'
        assert columns[2][2] == 'TEXT'

        indexes = index_list(self.db, '"personas"')
        assert len(indexes) == 1
        assert indexes[0][1] == (
            'personas_index_meta.mastodon_profile'
        )
예제 #3
0
    def test_create_index_with_multiple_fields(self):
        self.db.personas.create_index([
            ('name', str),
            ('age', int),
        ])
        row = self.db._connection.execute(
            'SELECT indexes FROM plume_master '
            'WHERE collection_name = "personas";'
        ).fetchone()
        indexes = json.loads(row[0])

        assert indexes == {
            'indexes': [
                {
                    'keys': [
                        ['name', 'TEXT', 'ASC'],
                        ['age', 'INTEGER', 'ASC'],
                    ],
                    'name': 'personas_index_name_age'
                }
            ],
            'indexed_fields': ['name', 'age'],
            'formated_indexed_fields': ['"name"', '"age"']
        }
        columns = table_info(self.db, 'personas')
        assert len(columns) == 4
        assert columns[2][1] == 'name'
        assert columns[2][2] == 'TEXT'
        assert columns[3][1] == 'age'
        assert columns[3][2] == 'INTEGER'

        indexes = index_list(self.db, '"personas"')
        assert len(indexes) == 1
        assert indexes[0][1] == 'personas_index_name_age'
예제 #4
0
def get_column_map(top_line, bottom_line):
    """
    get a dict of the form::

        map = {0: index0, 1: index1, ...}

    so if you're wondering what column in the top line corresponds
    to which one in the bottom line, you can use this mapping.

    To apply a column mapping easily, use apply_col_mapping()

    but the ideas is that you just say you just say::

        a = top line

        b = bottom line

        map = get_column_map(a, b)

        b = [b[map[i]] for i in range(len(b))]

    and then b has the same column order as a.

    top_line:
        list of floats
    
    bottom_line:
        list of floats
    """
    # abbreviate so they're easier to type
    a = top_line
    b = bottom_line

    # ensure lines have same length, so really we're making a mapping from
    # top_line to a chunk of bottom line
    assert len(a) == len(b)

    # this will be the mapping from "b ordering" to "a ordering"
    mapping = {n: n for n in range(len(b))}
    used_in_mapping = [False for _ in range(len(b))]
    used_in_mapping[0] = True  # ignore zeroth column -- energies

    for i, a_i in enumerate(a):
        # skip 0th column (energies are fixed in place)
        if i == 0:
            continue
        # say distance to zeroth column is huge so we never pick energies
        distances = [1000000] + [dist(a_i, b_j) for b_j in b[1:]]
        # indices will be sorted smallest to largest
        # and must contain all numbers up to len(a)
        # I wanted to just use distanced.index(x) for x in sorted(distances)
        # but that only gives the first index
        indices = utils.index_list(distances[:len(a)])
        # now take the index with the minimum distance that has not been used
        min_index = 0
        while used_in_mapping[indices[min_index]]:
            min_index += 1
        mapping[i] = indices[min_index]
        used_in_mapping[indices[min_index]] = True

    # check map is valid (if it's not injective we're going to have a bad time)
    outputs = []
    for output in mapping.values():
        if output in outputs:
            print(a)
            print(b)
            print(mapping)
            raise ValueError("somehow the column map is not injective...?")
        else:
            outputs.append(output)
    return mapping