Exemplo n.º 1
0
def query(event: Dict[str, Any]) -> None:
    """Run the query portion of a QJ"""
    query_config = QueryConfig()
    logger = Logger()
    logger.info(event=QJLogEvents.InitConfig, config=query_config)

    records = event.get("Records", [])
    if not records:
        raise Exception("No records found")
    if len(records) > 1:
        raise Exception(
            f"More than one record. BatchSize is probably not 1. event: {event}"
        )
    body = records[0].get("body")
    if body is None:
        raise Exception(
            f"No record body found. BatchSize is probably not 1. event: {event}"
        )
    body = json.loads(body)
    job = schemas.Job(**body)
    logger.info(event=QJLogEvents.InitJob, job=job)

    logger.info(event=QJLogEvents.RunQueryStart)
    query_result = run_query(job=job, config=query_config)
    logger.info(event=QJLogEvents.RunQueryEnd,
                num_results=query_result.get_length())

    results: List[schemas.Result] = []
    if query_config.account_id_key not in query_result.query_result_set.fields:
        raise Exception(
            f"Query results must contain field '{query_config.account_id_key}'"
        )
    for q_r in query_result.to_list():
        account_id = q_r[query_config.account_id_key]
        result = schemas.Result(
            account_id=account_id,
            result={
                key: val
                for key, val in q_r.items()
                if key != query_config.account_id_key
            },
        )
        results.append(result)

    graph_spec = schemas.ResultSetGraphSpec(
        graph_uris_load_times=query_result.graph_uris_load_times)
    result_set = schemas.ResultSetCreate(job=job,
                                         graph_spec=graph_spec,
                                         results=results)

    api_key = get_api_key(region_name=query_config.region)
    qj_client = QJAPIClient(host=query_config.api_host,
                            port=query_config.api_port,
                            api_key=api_key)
    logger.info(event=QJLogEvents.CreateResultSetStart)
    qj_client.create_result_set(result_set=result_set)
    logger.info(event=QJLogEvents.CreateResultSetEnd)
Exemplo n.º 2
0
    def test_views_with_two_fresh_result_sets_one_missing_an_account_other_expired(
            self):
        """Add two result sets, one of which are within max_result_age_sec and the other is expired.
        The newer is missing one account.  Run a query against the latest view to validate we get
        data for only one account (from the unexpired set)"""
        db_ro_user = "******"
        job_crud = CRUDJob(
            db_ro_user=db_ro_user,
            result_expiration_sec_default=int(1e6),
            result_expiration_sec_limit=int(1e6),
            max_graph_age_sec_default=int(1e6),
            max_graph_age_sec_limit=int(1e6),
            max_result_age_sec_default=int(1e6),
            max_result_age_sec_limit=int(1e6),
            account_id_key="test_account_id",
        )
        result_set_crud = CRUDResultSet(
            max_result_set_results=int(1e6),
            max_result_size_bytes=int(1e6),
            job_crud=job_crud,
        )
        with temp_db_session() as session:
            session.execute(f"CREATE ROLE {db_ro_user}")
            job_create = schemas.JobCreate(
                name="test_job",
                description="A Test Job",
                graph_spec=schemas.JobGraphSpec(graph_names=["test"]),
                category=schemas.Category.gov,
                severity=schemas.Severity.info,
                query=
                "select ?test_account_id ?foo ?boo where {?test_account_id ?foo ?boo} limit 10",
                max_graph_age_sec=int(1e6),
                result_expiration_sec=int(1e6),
                max_result_age_sec=int(1e6),
                notify_if_results=False,
            )
            created_timestamp = job_crud.create(
                db_session=session, job_create_in=job_create).created
            # activate
            job_update = schemas.JobUpdate(active=True)
            _job = job_crud.update_version(
                db_session=session,
                job_name="test_job",
                created=created_timestamp,
                job_update=job_update,
            )
            job = schemas.Job.from_orm(_job)

            account_id_a = "012345678901"
            account_id_b = "567890123456"

            result_set_1_time = datetime.now() - timedelta(
                seconds=job_create.max_result_age_sec + 1)
            result_set_1_graph_spec = schemas.ResultSetGraphSpec(
                graph_uris_load_times={"test": result_set_1_time.timestamp()})
            results_1 = [
                schemas.Result(
                    account_id=account_id_a,
                    result={
                        "foo": "oldhello_a",
                        "boo": "oldthere_a"
                    },
                ),
                schemas.Result(
                    account_id=account_id_a,
                    result={
                        "foo": "oldboo_a",
                        "boo": "oldfoo_a"
                    },
                ),
                schemas.Result(
                    account_id=account_id_b,
                    result={
                        "foo": "oldhello_b",
                        "boo": "oldthere_b"
                    },
                ),
                schemas.Result(
                    account_id=account_id_b,
                    result={
                        "foo": "oldboo_b",
                        "boo": "oldfoo_b"
                    },
                ),
            ]
            result_set_1_create = ResultSetCreate(
                job=job,
                graph_spec=result_set_1_graph_spec,
                results=results_1,
                created=result_set_1_time,
            )
            result_set_crud.create(db_session=session,
                                   obj_in=result_set_1_create)

            result_set_2_time = datetime.now()
            result_set_2_graph_spec = schemas.ResultSetGraphSpec(
                graph_uris_load_times={"test": result_set_2_time.timestamp()})
            results_2 = [
                schemas.Result(
                    account_id=account_id_a,
                    result={
                        "foo": "newhello_a",
                        "boo": "newthere_a"
                    },
                ),
                schemas.Result(
                    account_id=account_id_a,
                    result={
                        "foo": "newboo_a",
                        "boo": "newfoo_a"
                    },
                ),
            ]
            result_set_2_create = ResultSetCreate(
                job=job,
                graph_spec=result_set_2_graph_spec,
                results=results_2,
                created=result_set_2_time,
            )
            result_set_crud.create(db_session=session,
                                   obj_in=result_set_2_create)

            # check latest results
            latest_results = session.execute("select * from test_job_latest")
            latest_rows = latest_results.fetchall()
            self.assertSequenceEqual(
                sorted(latest_rows),
                sorted([
                    (result_set_2_time, account_id_a, "newhello_a",
                     "newthere_a"),
                    (result_set_2_time, account_id_a, "newboo_a", "newfoo_a"),
                ]),
            )
            # check all results
            all_results = session.execute("select * from test_job_all")
            all_rows = all_results.fetchall()
            self.assertSequenceEqual(
                sorted(all_rows),
                sorted([
                    (result_set_1_time, account_id_a, "oldhello_a",
                     "oldthere_a"),
                    (result_set_1_time, account_id_a, "oldboo_a", "oldfoo_a"),
                    (result_set_1_time, account_id_b, "oldhello_b",
                     "oldthere_b"),
                    (result_set_1_time, account_id_b, "oldboo_b", "oldfoo_b"),
                    (result_set_2_time, account_id_a, "newhello_a",
                     "newthere_a"),
                    (result_set_2_time, account_id_a, "newboo_a", "newfoo_a"),
                ]),
            )
Exemplo n.º 3
0
    def test_views_with_expired_result_set(self):
        """Add a single result set which is older than max_result_age_sec. Validate the latest
        view returns no results, also validate all_view"""
        db_ro_user = "******"
        job_crud = CRUDJob(
            db_ro_user=db_ro_user,
            result_expiration_sec_default=int(1e6),
            result_expiration_sec_limit=int(1e6),
            max_graph_age_sec_default=int(1e6),
            max_graph_age_sec_limit=int(1e6),
            max_result_age_sec_default=int(1e6),
            max_result_age_sec_limit=int(1e6),
            account_id_key="test_account_id",
        )
        result_set_crud = CRUDResultSet(
            max_result_set_results=int(1e6),
            max_result_size_bytes=int(1e6),
            job_crud=job_crud,
        )
        with temp_db_session() as session:
            session.execute(f"CREATE ROLE {db_ro_user}")
            job_create = schemas.JobCreate(
                name="test_job",
                description="A Test Job",
                graph_spec=schemas.JobGraphSpec(graph_names=["test"]),
                category=schemas.Category.gov,
                severity=schemas.Severity.info,
                query=
                "select ?test_account_id ?foo ?boo where {?test_account_id ?foo ?boo} limit 10",
                max_graph_age_sec=int(1e6),
                result_expiration_sec=int(1e6),
                max_result_age_sec=int(1e6),
                notify_if_results=False,
            )
            created_timestamp = job_crud.create(
                db_session=session, job_create_in=job_create).created
            # activate
            job_update = schemas.JobUpdate(active=True)
            _job = job_crud.update_version(
                db_session=session,
                job_name="test_job",
                created=created_timestamp,
                job_update=job_update,
            )
            job = schemas.Job.from_orm(_job)

            account_id_a = "012345678901"
            account_id_b = "567890123456"

            result_set_1_time = datetime.now() - timedelta(
                seconds=job_create.max_result_age_sec + 1)
            result_set_1_graph_spec = schemas.ResultSetGraphSpec(
                graph_uris_load_times={"test": result_set_1_time.timestamp()})
            results_1 = [
                schemas.Result(
                    account_id=account_id_a,
                    result={
                        "foo": "oldhello_a",
                        "boo": "oldthere_a"
                    },
                ),
                schemas.Result(
                    account_id=account_id_a,
                    result={
                        "foo": "oldboo_a",
                        "boo": "oldfoo_a"
                    },
                ),
                schemas.Result(
                    account_id=account_id_b,
                    result={
                        "foo": "oldhello_b",
                        "boo": "oldthere_b"
                    },
                ),
                schemas.Result(
                    account_id=account_id_b,
                    result={
                        "foo": "oldboo_b",
                        "boo": "oldfoo_b"
                    },
                ),
            ]
            result_set_1_create = ResultSetCreate(
                job=job,
                graph_spec=result_set_1_graph_spec,
                results=results_1,
                created=result_set_1_time,
            )
            result_set_crud.create(db_session=session,
                                   obj_in=result_set_1_create)

            # check latest results
            latest_results = session.execute("select * from test_job_latest")
            self.assertEqual(latest_results.rowcount, 0)
            # check all results
            all_results = session.execute("select * from test_job_all")
            all_rows = all_results.fetchall()
            self.assertSequenceEqual(
                sorted(all_rows),
                sorted([
                    (result_set_1_time, account_id_a, "oldhello_a",
                     "oldthere_a"),
                    (result_set_1_time, account_id_a, "oldboo_a", "oldfoo_a"),
                    (result_set_1_time, account_id_b, "oldhello_b",
                     "oldthere_b"),
                    (result_set_1_time, account_id_b, "oldboo_b", "oldfoo_b"),
                ]),
            )