def test_key_field_already_exists_b(): """ Test that specifying a key when one already exists doesn't result in an error. (overwrite = True). """ rows = read_csv("startrek.csv") validate_csv(rows, key_field="name", overwrite=True)
def test_missing_key_field(): """Test that missing key fields are handled properly.""" rows = read_csv("startrek_no_key_field.csv") correct = UnsupportedTable().dict() with pytest.raises(ValidationFailed) as v_error: validate_csv(rows, key_field="_key", overwrite=False) validation_resp = v_error.value.errors assert len(validation_resp) == 1 assert validation_resp[0] == correct
def test_invalid_key_field(): """Test that specifying a missing key field results in an error.""" rows = read_csv("startrek.csv") invalid_key = "invalid" correct = KeyFieldDoesNotExist(key=invalid_key).dict() with pytest.raises(ValidationFailed) as v_error: validate_csv(rows, key_field=invalid_key, overwrite=False) validation_resp = v_error.value.errors assert len(validation_resp) == 1 assert validation_resp[0] == correct
def test_duplicate_keys(): """Test that duplicate keys are handled properly.""" rows = read_csv("clubs_invalid_duplicate_keys.csv") with pytest.raises(ValidationFailed) as v_error: validate_csv(rows, key_field="_key", overwrite=False) validation_resp = v_error.value.errors correct = [ err.dict() for err in [DuplicateKey( key="2"), DuplicateKey(key="5")] ] assert all(err in validation_resp for err in correct)
def create_aql_table(self, table: str, aql_query: str) -> Table: """Create a table in this workspace from an aql query.""" if self.has_table(table): raise AlreadyExists("table", table) # In the future, the result of this validation can be # used to determine dependencies in virtual tables rows = list(self.run_query(aql_query)) validate_csv(rows, "_key", False) loaded_table = self.create_table(table, False) loaded_table.insert(rows) return loaded_table
def test_invalid_headers(): """Test that invalid headers are handled properly.""" rows = read_csv("membership_invalid_syntax.csv") with pytest.raises(ValidationFailed) as v_error: validate_csv(rows, key_field="_key", overwrite=False) validation_resp = v_error.value.errors correct = [ err.dict() for err in [ InvalidRow(row=3, columns=["_from"]), InvalidRow(row=4, columns=["_to"]), InvalidRow(row=5, columns=["_from", "_to"]), ] ] assert all(err in validation_resp for err in correct)
def upload(workspace: str, table: str, key: str = "_key", overwrite: bool = False) -> Any: """ Store a CSV file into the database as a node or edge table. `workspace` - the target workspace `table` - the target table `data` - the CSV data, passed in the request body. If the CSV data contains `_from` and `_to` fields, it will be treated as an edge table. """ loaded_workspace = Workspace(workspace) if loaded_workspace.has_table(table): raise AlreadyExists("table", table) app.logger.info("Bulk Loading") # Read the request body into CSV format body = decode_data(request.data) try: # Type to a Dict rather than an OrderedDict rows: List[Dict[str, str]] = list(csv.DictReader(StringIO(body))) except csv.Error: raise CSVReadError() # Perform validation. validate_csv(rows, key, overwrite) # Once we reach here, we know that the specified key field must be present, # and either: # key == "_key" # noqa: E800 # or key != "_key" and the "_key" field is not present # or key != "_key" and "_key" is present, but overwrite = True if key != "_key": rows = set_table_key(rows, key) # Check if it's an edge table or not fieldnames = rows[0].keys() edges = "_from" in fieldnames and "_to" in fieldnames # Create table and insert the data loaded_table = loaded_workspace.create_table(table, edges) results = loaded_table.insert(rows) return {"count": len(results)}
def test_key_field_already_exists_a(): """ Test that specifying a key when one already exists results in an error. (overwrite = False) """ rows = read_csv("startrek.csv") key_field = "name" correct = KeyFieldAlreadyExists(key=key_field).dict() with pytest.raises(ValidationFailed) as v_error: validate_csv(rows, key_field=key_field, overwrite=False) validation_resp = v_error.value.errors assert len(validation_resp) == 1 assert validation_resp[0] == correct
def test_missing_key_field(): """Test that missing key fields are handled properly.""" rows = read_csv("startrek_no_key_field.csv") correct = UnsupportedTable().dict() errors = validate_csv(rows, key_field="_key", overwrite=False) assert len(errors) == 1 assert errors[0] == correct
def test_invalid_key_field(): """Test that specifying a missing key field results in an error.""" rows = read_csv("startrek.csv") invalid_key = "invalid" correct = KeyFieldDoesNotExist(key=invalid_key).dict() errors = validate_csv(rows, key_field=invalid_key, overwrite=False) assert len(errors) == 1 assert errors[0] == correct
def test_duplicate_keys(): """Test that duplicate keys are handled properly.""" rows = read_csv("clubs_invalid_duplicate_keys.csv") errors = validate_csv(rows, key_field="_key", overwrite=False) correct = [ err.dict() for err in [DuplicateKey( key="2"), DuplicateKey(key="5")] ] assert all(err in errors for err in correct)
def test_invalid_headers(): """Test that invalid headers are handled properly.""" rows = read_csv("membership_invalid_syntax.csv") errors = validate_csv(rows, key_field="_key", overwrite=False) correct = [ err.dict() for err in [ InvalidRow(row=3, columns=["_from"]), InvalidRow(row=4, columns=["_to"]), InvalidRow(row=5, columns=["_from", "_to"]), ] ] assert all(err in errors for err in correct)
def test_key_field_already_exists_a(): """ Test that specifying a key when one already exists results in an error. (overwrite = False) """ rows = read_csv("startrek.csv") key_field = "name" correct = KeyFieldAlreadyExists(key=key_field).dict() errors = validate_csv(rows, key_field=key_field, overwrite=False) assert len(errors) == 1 assert errors[0] == correct
def upload( workspace: str, table: str, key: str = "_key", overwrite: bool = False, metadata: Optional[str] = None, ) -> Any: """ Store a CSV file into the database as a node or edge table. `workspace` - the target workspace `table` - the target table `data` - the CSV data, passed in the request body. If the CSV data contains `_from` and `_to` fields, it will be treated as an edge table. """ loaded_workspace = Workspace(workspace) if loaded_workspace.has_table(table): raise AlreadyExists("table", table) app.logger.info("Bulk Loading") # Read the request body into CSV format body = decode_data(request.data) try: # Type to a Dict rather than an OrderedDict csv_rows: List[UnprocessedTableRow] = list(csv.DictReader(StringIO(body))) except csv.Error: raise CSVReadError() # TODO: This temporarily needs to be done here, so that validation of the metadata # can be done before the table is actually created. Once the API is updated, this # will change. # https://github.com/multinet-app/multinet-server/issues/493 metadata_dict = {} if metadata: try: metadata_dict = json.loads(metadata) except json.decoder.JSONDecodeError: raise BadQueryArgument("metadata", metadata) table_metadata = table_metadata_from_dict(metadata_dict) rows, metadata_validation_errors = process_rows(csv_rows, table_metadata.columns) # Perform validation. csv_validation_errors = validate_csv(rows, key, overwrite) validation_errors = [*metadata_validation_errors, *csv_validation_errors] if len(validation_errors): raise ValidationFailed(errors=validation_errors) # Once we reach here, we know that the specified key field must be present, # and either: # key == "_key" # noqa: E800 # or key != "_key" and the "_key" field is not present # or key != "_key" and "_key" is present, but overwrite = True if key != "_key": rows = set_table_key(rows, key) # Create table and insert the data loaded_table = loaded_workspace.create_table(table, edge=is_edge_table(rows)) # Set table metadata loaded_table.set_metadata(metadata_dict) results = loaded_table.insert(rows) return {"count": len(results)}