def build_index(self, col_meta_data, docs, field): pname = DatabaseContext.DATA_FOLDER + col_meta_data.collection + '/' + col_meta_data.get_index_fname( field) if os.path.exists(pname): return {'status': 'already existing'} filter_tool = FilterTool({'$filter': {field: {'$exists': True}}}) resulting_docs = [] for d in docs: if filter_tool.match(d): resulting_docs.append(d) values = {} for i, doc in enumerate(resulting_docs): key = doc[field] if key not in values: values[key] = [] values[key].append(i) with open(pname, 'wb') as file: file.write(pickle.dumps(values)) FilesReader.get_instance().invalidate_file_content(pname) return col_meta_data.add_or_update_index_count(field, len(resulting_docs))
def test_clean_deleted_items(self): col_meta_data = CollectionMetaData('col') count = len(self.data_service.find_all(col_meta_data, None)) self.data_service.update(col_meta_data, [2], [{}]) CleaningStack.get_instance().push(col_meta_data, {}, 1) docs = self.data_service.find_all(col_meta_data, None) lines = self.indexes_service.find_all(col_meta_data, 'id', FilterTool({'$filter': {'id': 2}})) self.assertEqual(len(CleaningStack.get_instance().stack), 1) self.assertEqual(count, len(docs)) self.assertEqual(len(self.search_service.find_in_docs(docs, SearchContext({'$filter': {'id': 2}}))), 0) self.assertEqual(len(lines), 1) CleaningThread().run() docs = self.data_service.find_all(col_meta_data, None) lines = self.indexes_service.find_all(col_meta_data, 'id', FilterTool({'$filter': {'id': 2}})) self.assertEqual(len(CleaningStack.get_instance().stack), 0) self.assertEqual(count - 1, len(docs)) self.assertEqual(len(self.search_service.find_in_docs(docs, SearchContext({'$filter': {'id': 2}}))), 0) self.assertEqual(len(lines), 0)
def test_update_indexes(self): col_meta_data = CollectionMetaData('col') docs = self.data_service.find_all(col_meta_data, None) self.indexes_service.build_index(col_meta_data, docs, 'id') lines = self.indexes_service.find_all( col_meta_data, 'id', FilterTool({'$filter': { 'id': 2 }})) # this updates the index information, not the document itself self.indexes_service.update_indexes(col_meta_data, [{ 'id': 2 }], [{ 'id': 20 }]) new_lines = self.indexes_service.find_all( col_meta_data, 'id', FilterTool({'$filter': { 'id': 20 }})) self.assertEqual(len(new_lines), 1) self.assertListEqual(lines, new_lines)
def test_search_on_empty_doc(self): search_filter = FilterTool( {'$filter': { '$not': { 'first_name': 'John' } }}) self.assertFalse(search_filter.match({}))
def test_inner_doc(self): search_filter = FilterTool({'$filter': {'user.first_name': 'John'}}) self.assertTrue( search_filter.match( {'user': { 'first_name': 'John', 'last_name': 'Smith' }}))
def test_find_in_list(self): search_filter = FilterTool({'$filter': {'children': 'Junior'}}) self.assertTrue( search_filter.match({ 'first_name': 'John', 'last_name': 'Doe', 'children': ['Junior', 'Mick'] }))
def test_fail_or_condition_by_missing(self): search_filter = FilterTool( {'$filter': [{ 'id': 15 }, { 'first_name': 'fn' }]}) self.assertFalse(search_filter.match({'last_name': 'ln'}))
def test_fail_inner_doc(self): search_filter = FilterTool({'$filter': {'user.last_name': 'Doe'}}) self.assertFalse( search_filter.match( {'user': { 'first_name': 'John', 'last_name': 'Smith' }}))
def test_or_condition(self): search_filter = FilterTool( {'$filter': [{ 'id': 15 }, { 'first_name': 'fn' }]}) self.assertTrue(search_filter.match({'first_name': 'fn'}))
def test_fail_regex(self): search_filter = FilterTool( {'$filter': { 'first_name': { '$reg': '.*nh$' } }}) self.assertFalse( search_filter.match({ 'first_name': 'John', 'last_name': 'Smith' }))
def test_regex(self): search_filter = FilterTool( {'$filter': { 'first_name': { '$reg': '^Jo.*' } }}) self.assertTrue( search_filter.match({ 'first_name': 'John', 'last_name': 'Smith' }))
def test_inner_dict_doesnt_not_exists_field(self): search_filter = FilterTool( {'$filter': { 'first_name': { '$exists': False } }}) self.assertFalse( search_filter.match({ 'first_name': 'John', 'last_name': 'Doe' }))
def test_fail_multiple_and_conditions(self): search_filter = FilterTool( {'$filter': { 'first_name': 'fn', 'last_name': 'ln' }}) self.assertFalse( search_filter.match({ 'id': 1234, 'first_name': 'fn', 'last_name': 'ln2' }))
def __init__(self, raw_query): query = {} for k in raw_query.keys(): query[k.lower()] = raw_query[k] if '$filter' in query: self.filter = FilterTool(query) self.filter_keys = [] for k in query['$filter'].keys(): self.filter_keys.append({k: query['$filter'][k]}) else: self.filter = None if '$size' in query: self.size = query['$size'] else: self.size = DatabaseContext.DEFAULT_RESULTS_SIZE if '$skip' in query: self.skip = query['$skip'] else: self.skip = DatabaseContext.DEFAULT_RESULTS_SKIP if '$sort' in query: self.sort = SortTool(query['$sort']) else: self.sort = None if '$map' in query: self.map = query['$map'] else: self.map = None
def test_update_value(self): self.data_service.update(CollectionMetaData('col'), [2], [{ 'id': 2, 'first_name': 'Joooooohn', 'last_name': 'Smith' }]) self.assertEqual(self.data_service.file_len('data-test/col/data1.bin'), 3) filter_tool = FilterTool({'$filter': {'first_name': 'Joooooohn'}}) result = self.data_service.find_one_in_file('data-test/col/data1.bin', filter_tool) self.assertEqual(result['id'], 2) self.assertEqual(result['first_name'], 'Joooooohn') self.data_service.update(CollectionMetaData('col'), [2], [{ 'id': 2, 'first_name': 'John', 'last_name': 'Smith' }]) results = self.data_service.find_one_in_file('data-test/col/data1.bin', filter_tool) self.assertIsNone(results)
def test_fail_inner_and_filter_condition(self): search_filter = FilterTool({ '$filter': [{ 'first_name': 'fn' }, { '$filter': { 'last_name': 'ln', 'address': 'somewhere' } }] }) self.assertFalse( search_filter.match({ 'last_name': 'ln2', 'address': 'somewhere2' }))
def test_remove_doc_in_file(self): result = self.data_service.update(CollectionMetaData('col'), [6], [{}])[0] self.assertEqual(result, {'line': 5, 'doc': {}}) filter_tool = FilterTool({'$filter': {'id': 6}}) result = self.data_service.find_one_in_file('data-test/col/data2.bin', filter_tool) self.assertIsNone(result)
def test_inner_or_filter_condition(self): search_filter = FilterTool({ '$filter': { 'first_name': 'fn', '$filter': [{ 'last_name': 'ln' }, { 'address': 'somewhere' }] } }) self.assertTrue( search_filter.match({ 'first_name': 'fn', 'last_name': 'ln2', 'address': 'somewhere' }))
def test_negate_expression(self): search_filter = FilterTool( {'$filter': { '$not': { 'first_name': 'John' } }}) self.assertFalse( search_filter.match({ 'last_name': 'Doe', 'first_name': 'John' })) self.assertTrue( search_filter.match({ 'last_name': 'Doe', 'first_name': 'Joe' }))
def update(self, col_meta_data, ids, input_docs): updated = [] counter = 0 line_counter = 0 docs = None try: id = ids[counter] input_doc = input_docs[counter] counter += 1 for fname in col_meta_data.enumerate_data_fnames(None): pname = DatabaseContext.DATA_FOLDER + col_meta_data.collection + '/' + fname results = self.find_one_in_file( pname, FilterTool({'$filter': { 'id': id }})) docs = None if results is not None: if docs is None: docs = FilesReader.get_instance().get_file_content( pname) updated_docs = [] for i, doc in enumerate(docs): if bool(doc) and doc["id"] == id: normalized_doc = self.normalize([input_doc]) updated.append({ 'line': i + line_counter, 'doc': normalized_doc[0] }) updated_docs.extend(normalized_doc) if counter < len(ids): id = ids[counter] input_doc = input_docs[counter] counter += 1 else: updated_docs.append(doc) FilesReader.get_instance().write_file_content( pname, updated_docs) line_counter += DatabaseContext.MAX_DOC_PER_FILE except StopIteration: if docs is not None: FilesReader.get_instance().write_file_content( pname, updated_docs) return updated
def test_search_on_none_nested_docs(self): search_filter = FilterTool( {'$filter': { 'family': { '$none': { 'first_name': 'Jym' } } }}) self.assertTrue( search_filter.match({ 'first_name': 'John', 'last_name': 'Smith', 'family': [{ 'first_name': 'Jack', 'last_name': 'Smith' }, { 'first_name': 'Joe', 'last_name': 'Smith' }] })) self.assertFalse( search_filter.match({ 'first_name': 'John', 'last_name': 'Smith', 'family': [{ 'first_name': 'Jack', 'last_name': 'Smith' }, { 'first_name': 'Jym', 'last_name': 'Smith' }] }))
def search_by_thread(self, col_meta_data, search_context, thread_id): indexed_value = self.find_field_in_index(col_meta_data, search_context) docs = [] if indexed_value != None: k = list(indexed_value.keys())[0] # filter by main index lines = self.indexes_service.find_all( col_meta_data, k, FilterTool({'$filter': indexed_value})) docs = self.data_service.find_by_line(col_meta_data, lines, thread_id) else: docs = self.data_service.find_all(col_meta_data, thread_id) return self.find_in_docs(docs, search_context)
def test_find_one_doc_in_file(self): filter_tool = FilterTool({'$filter': {'id': 3}}) result = self.data_service.find_one_in_file('data-test/col/data1.bin', filter_tool) self.assertEqual(result['id'], 3)
def test_fail_and_condition_by_missing(self): search_filter = FilterTool({'$filter': {'id': 1234}}) self.assertFalse(search_filter.match({'some': 12345}))
def test_comparison_successfull(self): search_filter = FilterTool({'$filter': {'age': {'$gt': 40}}}) self.assertTrue(search_filter.match({'id': 15, 'age': 50}))
def test_and_condition(self): search_filter = FilterTool({'$filter': {'id': 1234}}) self.assertTrue(search_filter.match({'id': 1234}))
def test_fail_comparison(self): search_filter = FilterTool({'$filter': {'age': {'$lt': 40}}}) self.assertFalse(search_filter.match({'id': 15, 'age': 50}))
def test_fail_in_condition(self): search_filter = FilterTool({'$filter': {'id': [12, 13, 14]}}) self.assertFalse(search_filter.match({'id': 15}))
def test_in_condition(self): search_filter = FilterTool({'$filter': {'id': [12, 13, 14]}}) self.assertTrue(search_filter.match({'id': 13}))