def get_expired_result_sets( *, db_session: Session = Depends(deps.db_session), result_set_crud: CRUDResultSet = Depends(deps.result_set_crud), ) -> Any: """Get all expired ResultSets""" return result_set_crud.get_expired(db_session=db_session)
def create_result_set( *, db_session: Session = Depends(deps.db_session), result_set_notifier: ResultSetNotifier = Depends(deps.result_set_notifier), result_set_crud: CRUDResultSet = Depends(deps.result_set_crud), result_set_in: schemas.ResultSetCreate, ) -> Any: """Create a ResultSet""" try: result_set = result_set_crud.create(db_session, obj_in=result_set_in) if result_set.results: if result_set_in.job.notify_if_results: result_set_notification = schemas.ResultSetNotification( job=result_set_in.job, graph_spec=result_set_in.graph_spec, created=result_set_in.created, num_results=len(result_set_in.results), result_set_id=str(result_set.result_set_id), ) result_set_notifier.notify( notification=result_set_notification) return result_set except (JobVersionNotFound, ResultSetResultsLimitExceeded, ResultSizeExceeded) as ex: raise HTTPException(status_code=HTTP_400_BAD_REQUEST, detail=str(ex)) from ex
def delete_expired_result_sets( *, db_session: Session = Depends(deps.db_session), result_set_crud: CRUDResultSet = Depends(deps.result_set_crud), ) -> Any: """Delete all expired ResultSets""" num_pruned = result_set_crud.delete_expired(db_session=db_session) return schemas.ResultSetsPruneResult(num_pruned=num_pruned)
def result_set_crud() -> CRUDResultSet: """Get a CRUDResultSet object""" api_svc_config = APIServiceConfig() return CRUDResultSet( max_result_set_results=api_svc_config.max_result_set_results, max_result_size_bytes=api_svc_config.max_result_size_bytes, job_crud=job_crud(), )
def create_result_set( *, db_session: Session = Depends(deps.db_session), result_set_crud: CRUDResultSet = Depends(deps.result_set_crud), result_set_in: schemas.ResultSetCreate, ) -> Any: """Create a ResultSet""" try: return result_set_crud.create(db_session, obj_in=result_set_in) except (JobVersionNotFound, ResultSetResultsLimitExceeded, ResultSizeExceeded) as ex: raise HTTPException(status_code=HTTP_400_BAD_REQUEST, detail=str(ex))
def get_result_set( *, db_session: Session = Depends(deps.db_session), result_set_crud: CRUDResultSet = Depends(deps.result_set_crud), result_set_id: str, ) -> Any: """Get a ResultSet by id""" try: return result_set_crud.get(db_session, result_set_id=result_set_id) except ResultSetNotFound as ex: raise HTTPException(status_code=HTTP_404_NOT_FOUND, detail=str(ex))
def get_job_latest_result_set( *, db_session: Session = Depends(deps.db_session), result_set_crud: CRUDResultSet = Depends(deps.result_set_crud), job_name: str, ) -> Any: """Get the latest result set of a Job""" try: return result_set_crud.get_latest_for_active_job(db_session, job_name=job_name) except ResultSetNotFound as ex: raise HTTPException(status_code=HTTP_404_NOT_FOUND, detail=str(ex)) from ex
def get_result_set( *, db_session: Session = Depends(deps.db_session), result_set_crud: CRUDResultSet = Depends(deps.result_set_crud), result_set_id: str, result_format: schemas.ResultSetFormat = schemas.ResultSetFormat.json, ) -> Any: """Get a ResultSet by id""" try: result_set = result_set_crud.get(db_session, result_set_id=result_set_id) except ResultSetNotFound as ex: raise HTTPException(status_code=HTTP_404_NOT_FOUND, detail=str(ex)) from ex if result_format == schemas.ResultSetFormat.csv: return Response(content=result_set.to_api_schema().to_csv(), media_type="text/csv") return result_set
def get_job_latest_result_set( *, db_session: Session = Depends(deps.db_session), result_set_crud: CRUDResultSet = Depends(deps.result_set_crud), job_name: str, result_format: schemas.ResultSetFormat = schemas.ResultSetFormat.json, response: Response, if_none_match: Optional[str] = Header(None), ) -> Any: """Get the latest result set of a Job""" try: result_set = result_set_crud.get_latest_for_active_job(db_session, job_name=job_name) response.headers["Cache-Control"] = "public, must-revalidate, proxy-revalidate, max-age=30" etag = base64.b64encode(str(result_set.created).encode()).decode() response.headers["ETag"] = etag if etag == if_none_match: return Response(status_code=HTTP_304_NOT_MODIFIED) except ResultSetNotFound as ex: raise HTTPException(status_code=HTTP_404_NOT_FOUND, detail=str(ex)) from ex if result_format == schemas.ResultSetFormat.csv: return Response(content=result_set.to_api_schema().to_csv(), media_type="text/csv") return result_set
def test_views_with_two_fresh_result_sets_one_missing_an_account_other_expired( self): """Add two result sets, one of which are within max_result_age_sec and the other is expired. The newer is missing one account. Run a query against the latest view to validate we get data for only one account (from the unexpired set)""" db_ro_user = "******" job_crud = CRUDJob( db_ro_user=db_ro_user, result_expiration_sec_default=int(1e6), result_expiration_sec_limit=int(1e6), max_graph_age_sec_default=int(1e6), max_graph_age_sec_limit=int(1e6), max_result_age_sec_default=int(1e6), max_result_age_sec_limit=int(1e6), account_id_key="test_account_id", ) result_set_crud = CRUDResultSet( max_result_set_results=int(1e6), max_result_size_bytes=int(1e6), job_crud=job_crud, ) with temp_db_session() as session: session.execute(f"CREATE ROLE {db_ro_user}") job_create = schemas.JobCreate( name="test_job", description="A Test Job", graph_spec=schemas.JobGraphSpec(graph_names=["test"]), category=schemas.Category.gov, severity=schemas.Severity.info, query= "select ?test_account_id ?foo ?boo where {?test_account_id ?foo ?boo} limit 10", max_graph_age_sec=int(1e6), result_expiration_sec=int(1e6), max_result_age_sec=int(1e6), notify_if_results=False, ) created_timestamp = job_crud.create( db_session=session, job_create_in=job_create).created # activate job_update = schemas.JobUpdate(active=True) _job = job_crud.update_version( db_session=session, job_name="test_job", created=created_timestamp, job_update=job_update, ) job = schemas.Job.from_orm(_job) account_id_a = "012345678901" account_id_b = "567890123456" result_set_1_time = datetime.now() - timedelta( seconds=job_create.max_result_age_sec + 1) result_set_1_graph_spec = schemas.ResultSetGraphSpec( graph_uris_load_times={"test": result_set_1_time.timestamp()}) results_1 = [ schemas.Result( account_id=account_id_a, result={ "foo": "oldhello_a", "boo": "oldthere_a" }, ), schemas.Result( account_id=account_id_a, result={ "foo": "oldboo_a", "boo": "oldfoo_a" }, ), schemas.Result( account_id=account_id_b, result={ "foo": "oldhello_b", "boo": "oldthere_b" }, ), schemas.Result( account_id=account_id_b, result={ "foo": "oldboo_b", "boo": "oldfoo_b" }, ), ] result_set_1_create = ResultSetCreate( job=job, graph_spec=result_set_1_graph_spec, results=results_1, created=result_set_1_time, ) result_set_crud.create(db_session=session, obj_in=result_set_1_create) result_set_2_time = datetime.now() result_set_2_graph_spec = schemas.ResultSetGraphSpec( graph_uris_load_times={"test": result_set_2_time.timestamp()}) results_2 = [ schemas.Result( account_id=account_id_a, result={ "foo": "newhello_a", "boo": "newthere_a" }, ), schemas.Result( account_id=account_id_a, result={ "foo": "newboo_a", "boo": "newfoo_a" }, ), ] result_set_2_create = ResultSetCreate( job=job, graph_spec=result_set_2_graph_spec, results=results_2, created=result_set_2_time, ) result_set_crud.create(db_session=session, obj_in=result_set_2_create) # check latest results latest_results = session.execute("select * from test_job_latest") latest_rows = latest_results.fetchall() self.assertSequenceEqual( sorted(latest_rows), sorted([ (result_set_2_time, account_id_a, "newhello_a", "newthere_a"), (result_set_2_time, account_id_a, "newboo_a", "newfoo_a"), ]), ) # check all results all_results = session.execute("select * from test_job_all") all_rows = all_results.fetchall() self.assertSequenceEqual( sorted(all_rows), sorted([ (result_set_1_time, account_id_a, "oldhello_a", "oldthere_a"), (result_set_1_time, account_id_a, "oldboo_a", "oldfoo_a"), (result_set_1_time, account_id_b, "oldhello_b", "oldthere_b"), (result_set_1_time, account_id_b, "oldboo_b", "oldfoo_b"), (result_set_2_time, account_id_a, "newhello_a", "newthere_a"), (result_set_2_time, account_id_a, "newboo_a", "newfoo_a"), ]), )
def test_views_with_expired_result_set(self): """Add a single result set which is older than max_result_age_sec. Validate the latest view returns no results, also validate all_view""" db_ro_user = "******" job_crud = CRUDJob( db_ro_user=db_ro_user, result_expiration_sec_default=int(1e6), result_expiration_sec_limit=int(1e6), max_graph_age_sec_default=int(1e6), max_graph_age_sec_limit=int(1e6), max_result_age_sec_default=int(1e6), max_result_age_sec_limit=int(1e6), account_id_key="test_account_id", ) result_set_crud = CRUDResultSet( max_result_set_results=int(1e6), max_result_size_bytes=int(1e6), job_crud=job_crud, ) with temp_db_session() as session: session.execute(f"CREATE ROLE {db_ro_user}") job_create = schemas.JobCreate( name="test_job", description="A Test Job", graph_spec=schemas.JobGraphSpec(graph_names=["test"]), category=schemas.Category.gov, severity=schemas.Severity.info, query= "select ?test_account_id ?foo ?boo where {?test_account_id ?foo ?boo} limit 10", max_graph_age_sec=int(1e6), result_expiration_sec=int(1e6), max_result_age_sec=int(1e6), notify_if_results=False, ) created_timestamp = job_crud.create( db_session=session, job_create_in=job_create).created # activate job_update = schemas.JobUpdate(active=True) _job = job_crud.update_version( db_session=session, job_name="test_job", created=created_timestamp, job_update=job_update, ) job = schemas.Job.from_orm(_job) account_id_a = "012345678901" account_id_b = "567890123456" result_set_1_time = datetime.now() - timedelta( seconds=job_create.max_result_age_sec + 1) result_set_1_graph_spec = schemas.ResultSetGraphSpec( graph_uris_load_times={"test": result_set_1_time.timestamp()}) results_1 = [ schemas.Result( account_id=account_id_a, result={ "foo": "oldhello_a", "boo": "oldthere_a" }, ), schemas.Result( account_id=account_id_a, result={ "foo": "oldboo_a", "boo": "oldfoo_a" }, ), schemas.Result( account_id=account_id_b, result={ "foo": "oldhello_b", "boo": "oldthere_b" }, ), schemas.Result( account_id=account_id_b, result={ "foo": "oldboo_b", "boo": "oldfoo_b" }, ), ] result_set_1_create = ResultSetCreate( job=job, graph_spec=result_set_1_graph_spec, results=results_1, created=result_set_1_time, ) result_set_crud.create(db_session=session, obj_in=result_set_1_create) # check latest results latest_results = session.execute("select * from test_job_latest") self.assertEqual(latest_results.rowcount, 0) # check all results all_results = session.execute("select * from test_job_all") all_rows = all_results.fetchall() self.assertSequenceEqual( sorted(all_rows), sorted([ (result_set_1_time, account_id_a, "oldhello_a", "oldthere_a"), (result_set_1_time, account_id_a, "oldboo_a", "oldfoo_a"), (result_set_1_time, account_id_b, "oldhello_b", "oldthere_b"), (result_set_1_time, account_id_b, "oldboo_b", "oldfoo_b"), ]), )