def get_setup_entities(entity_names, data_files): if isinstance(data_files, str): if os.path.isdir(data_files): files = _auto_find_file_names(entity_names, data_files) data_dir = data_files data_files = [ os.path.join(data_dir, file_name) for file_name in files ] else: raise TypeError( "Invalid data_files given. data_files should be a directory path to files" " or list() of file paths themselves") elif not isinstance(data_files, list): raise TypeError( "Invalid data_files given. data_files should be a directory path to files" " or list() of file paths themselves") if len(entity_names) != len(data_files): raise ValueError( "Entity names and data files should be of same length") entities = {} for name, data_file in zip(entity_names, data_files): ent = Entity(name) ent.load_data_build_indices(os.path.abspath(data_file)) entities[name] = ent return entities
def test_duplicate_primary_key(self): """Duplicate Primary Key throw an error """ test_in_data = [{"_id": 1}, {"_id": 1}] for dup in test_in_data: entity = Entity("user") with pytest.raises(DuplicatePrimaryKeyError) as error: entity.load_data_build_indices(test_in_data) assert "Duplicate primary key value: " in str(error.value) assert True
def test_entity_invalid_file(self): """Test for a FileNotFoundError when an empty string or invalid path to file is given """ entity = Entity("user") invalid_files = ["a", "nofile.txt", "{}", "set", "True", "None"] for invalid_file_name in invalid_files: with pytest.raises(FileNotFoundError) as error: entity.load_data_build_indices(invalid_file_name) assert "[Errno 2] No such file or directory:" in str(error.value)
def test_build_index_missing_primary_key(self): """Missing primary key should throw an error """ no_pkey_data = [[{}], [{"url": "https://test.com"}]] for no_pkey in no_pkey_data: entity = Entity("ticket") with pytest.raises(PrimaryKeyNotFoundError): entity.load_data_build_indices(no_pkey) assert True
def test_build_pkey_index_unhashable(self): """Unhashable values in data point's primary key index should not throw TypeErrors as they are being stringified """ test_in_data = [ [{ "_id": { 1: 1 } }], [{ "_id": {1} }], [{ "_id": [1] }], ] test_out_data = [ { "_id": { "{1: 1}": { "_id": { 1: 1 } } } }, { "_id": { "{1}": { "_id": {1} } } }, { "_id": { "[1]": { "_id": [1] } } }, ] for inp, out in zip(test_in_data, test_out_data): entity = Entity("ticket") print(inp) entity.load_data_build_indices(inp) assert entity._indices == out assert True
def test_build_load_invalid_data_type(self): """Valid data = [], [{"primary_key": }], 'path/to/file' Invalid data should throw a value error """ invalid_input_data = [1, {1}, (), True, None, Entity("user")] for invalid_data_point in invalid_input_data: entity = Entity("ticket") with pytest.raises(TypeError) as error: entity.load_data_build_indices(invalid_data_point) assert ( "Data to load should be one of file path as str(), data point as dict() or data as list of data point()" == str(error.value)) assert True
def get_entity_with_data_indices(entity_name): """Instantiates and returns an Entity object of entity_name after loading data (from inferred test data file) and building _indices Args: entity_name (str): One of user, organization, ticket Returns: Entity(): entity object of name entity_name, with test data loaded and incdices built """ data_file_name = f"{os.path.dirname(os.path.abspath(__file__))}/test_data/test_data_import_{entity_name}s.json" entity = Entity(entity_name) entity.load_data_build_indices(data_file_name) return entity
def test_entity_invalid_json_structure(self, tmpdir): """Invalid json in any of the entity files should throw a Json Decode Error """ for invalid_json in [ "{", "[}]", '{"_id":1 "2":2}', "", " ", "[", "nothing" ]: tmp_file_name = f"{tmpdir}/invalid_json.json" write_to_file(invalid_json, tmp_file_name) entity = Entity("user") with pytest.raises(ValueError): entity.load_data_build_indices(tmp_file_name) assert True
def test_custom_primary_key(self, tmpdir): """Custom primary key should use the given custom primary key """ tmp_file_name = f"{tmpdir}/custom_prim_key.json" test_data = '[{"cid": 1}]' test_primary_key = "cid" expected_index = {"cid": {"1": {"cid": 1}}} write_to_file(test_data, tmp_file_name) entity = Entity("user", "cid") entity.load_data_build_indices(tmp_file_name) assert test_primary_key == entity.primary_key assert expected_index == entity._indices
def test_build_index_tags(self): """Test that when the data point has values that are a list we flatten them """ test_in_data = [ [{ "_id": 1, "tags": ["tag1", "tag2"] }], [{ "_id": 1, "tags": [] }], ] test_out_data = [ { "_id": { "1": { "_id": 1, "tags": ["tag1", "tag2"] } }, "tags": { "tag1": [1], "tag2": [1] }, }, { "_id": { "1": { "_id": 1, "tags": [] } }, "tags": { "": [1] } }, ] for inp, out in zip(test_in_data, test_out_data): entity = Entity("ticket") entity.load_data_build_indices(inp) assert out == entity._indices assert True
def test_entity_valid_data_in_file(self, tmpdir): """Testing with valid data should result in expected output, empty data [] should result in empty index {} is not valid as it doesn't have the primary key in it """ test_io = { "[]": { "_id": {} }, '{"_id": 1}': { "_id": { "1": { "_id": 1 } } }, '[{"_id": 1}]': { "_id": { "1": { "_id": 1 } } }, '[{"_id": 1, "d": 2}]': { "_id": { "1": { "_id": 1, "d": 2 } }, "d": { 2: [1] } }, } for in_data in test_io: tmp_file_name = f"{tmpdir}/invalid_json.json" write_to_file(in_data, tmp_file_name) entity = Entity("user") entity.load_data_build_indices(tmp_file_name) assert test_io[in_data] == entity._indices assert True
def test_entity_valid_data_no_file(self, tmpdir): """Testing with valid data should result in expected output, empty data [] should result in empty index {} is not valid as it doesn't have the primary key in it """ test_in_data = [[], {"_id": 1}, [{"_id": 1}], [{"_id": 1, "d": 2}]] test_out_data = [ { "_id": {} }, { "_id": { "1": { "_id": 1 } } }, { "_id": { "1": { "_id": 1 } } }, { "_id": { "1": { "_id": 1, "d": 2 } }, "d": { 2: [1] } }, ] for inp, out in zip(test_in_data, test_out_data): entity = Entity("user") entity.load_data_build_indices(inp) assert out == entity._indices assert True
def test_build_index_unhashable(self): """Unhashable values in data point's fields should throw TypeErrors """ test_in_data = [ [{ "_id": 1, "unhash": set() }], [{ "_id": 1, "tags": {} }], ] for inp in test_in_data: entity = Entity("ticket") with pytest.raises(TypeError) as error: entity.load_data_build_indices(inp) assert "Unhashable value" in str(error.value) assert True
def test_entity_missing_mandatory_key(self, tmpdir): """Missing '_id' in ANY data point should throw a ValueError """ for empty_data in [ "{}", "[{}]", json.dumps({"url": "https://test.com"}), json.dumps([{ "_id": 1 }, { "url": "https://test.com" }]), ]: tmp_file_name = f"{tmpdir}/missing_id.json" write_to_file(empty_data, tmp_file_name) entity = Entity("user") with pytest.raises(PrimaryKeyNotFoundError) as error: entity.load_data_build_indices(tmp_file_name) assert "Cannot find _id in the data point:" in str(error.value) assert True
def test_entity_struct(self): """Test to see if Entity instantiates with a primary key alteast an index on primary key _build_indices load_data_build_indices search """ entity = Entity("user") assert entity.primary_key == "_id" assert entity._indices == {"_id": {}} assert entity._data == [] assert hasattr(entity, "_build_indices") assert hasattr(entity, "load_data_build_indices") assert hasattr(entity, "search")
def test_build_index_valid_data(self): """Valid data should return valid _indices if the data is - [] it should result in vanilla index """ test_ticket_in_data = [ [], [{ "_id": 1, "name": "surya" }], [{ "_id": 1, "name": "surya" }, { "_id": 2, "name": "surya" }], [{ "_id": "436bf9b0-1147-4c0a-8439-6f79833bff5b", "url": "http://initech.zendesk.com/api/v2/tickets/436bf9b0-1147-4c0a-8439-6f79833bff5b.json", "external_id": "9210cdc9-4bee-485f-a078-35396cd74063", }], ] test_ticket_out_data = [ { "_id": {} }, { "_id": { "1": { "_id": 1, "name": "surya" } }, "name": { "surya": [1] } }, { "_id": { "1": { "_id": 1, "name": "surya" }, "2": { "_id": 2, "name": "surya" }, }, "name": { "surya": [1, 2] }, }, { "_id": { "436bf9b0-1147-4c0a-8439-6f79833bff5b": { "_id": "436bf9b0-1147-4c0a-8439-6f79833bff5b", "url": "http://initech.zendesk.com/api/v2/tickets/436bf9b0-1147-4c0a-8439-6f79833bff5b.json", "external_id": "9210cdc9-4bee-485f-a078-35396cd74063", }, }, "url": { "http://initech.zendesk.com/api/v2/tickets/436bf9b0-1147-4c0a-8439-6f79833bff5b.json": ["436bf9b0-1147-4c0a-8439-6f79833bff5b"] }, "external_id": { "9210cdc9-4bee-485f-a078-35396cd74063": ["436bf9b0-1147-4c0a-8439-6f79833bff5b"] }, }, ] for inp, out in zip(test_ticket_in_data, test_ticket_out_data): entity = Entity("ticket") entity.load_data_build_indices(inp) assert out == entity._indices assert True
def test_build_index_blank_values(self): """Testing for corner cases, empty strings, spaces, empty lists as values in data fields """ test_in_data = [ [{ "_id": "" }], [{ "_id": " " }], [{ "_id": 1, "tags": [] }], [{ "_id": "", "name": "surya" }], ] test_out_data = [ { "_id": { "": { "_id": "" } } }, { "_id": { " ": { "_id": " " } } }, { "_id": { "1": { "_id": 1, "tags": [] } }, "tags": { "": [1] } }, { "_id": { "": { "_id": "", "name": "surya" } }, "name": { "surya": [""] } }, ] for inp, out in zip(test_in_data, test_out_data): entity = Entity("organization") entity.load_data_build_indices(inp) assert out == entity._indices assert True
def get_entity_from_formatted_data(entity_name, data): entity = Entity(entity_name) entity.load_data_build_indices(data) return entity