Пример #1
0
async def get_thing(
    identifier: str,
    full: bool = False,
    format: isb_format.ISBFormat = isb_format.ISBFormat.ORIGINAL,
    session: Session = Depends(get_session),
):
    """Record for the specified identifier"""
    item = sqlmodel_database.get_thing_with_id(session, identifier)
    if item is None:
        raise fastapi.HTTPException(
            status_code=404, detail=f"Thing not found: {identifier}"
        )
    if full or format == isb_format.ISBFormat.FULL:
        return item
    if format == isb_format.ISBFormat.CORE:
        authority_id = item.authority_id
        if authority_id == "SESAR":
            content = SESARTransformer(item.resolved_content).transform()
        elif authority_id == "GEOME":
            content = GEOMETransformer(item.resolved_content).transform()
        elif authority_id == "OPENCONTEXT":
            content = OpenContextTransformer(item.resolved_content).transform()
        elif authority_id == "SMITHSONIAN":
            content = SmithsonianTransformer(item.resolved_content).transform()
        else:
            raise fastapi.HTTPException(
                status_code=400,
                detail=f"Core format not available for authority_id: {authority_id}",
            )
    else:
        content = item.resolved_content
    return fastapi.responses.JSONResponse(
        content=content, media_type=item.resolved_media_type
    )
async def _load_open_context_entries(session, max_count, start_from):
    L = get_logger()
    records = isb_lib.opencontext_adapter.OpenContextRecordIterator(
        max_entries=max_count, date_start=start_from, page_size=200)

    num_ids = 0
    for record in records:
        L.info("got next id from open context %s", record)
        num_ids += 1
        id = record["uri"]
        existing_thing = sqlmodel_database.get_thing_with_id(session, id)
        if existing_thing is not None:
            logging.info("Already have %s", id)
            isb_lib.opencontext_adapter.update_thing(existing_thing, record,
                                                     datetime.datetime.now(),
                                                     records.last_url_str())
            session.commit()
            logging.info("Just saved existing thing")
        else:
            logging.debug("Don't have %s", id)
            thing = isb_lib.opencontext_adapter.load_thing(
                record, datetime.datetime.now(), records.last_url_str())
            try:
                logging.debug("Going to add thing to session")
                session.add(thing)
                logging.debug("Added thing to session")
                session.commit()
                logging.debug("committed session")
            except sqlalchemy.exc.IntegrityError as e:
                session.rollback()
                logging.error("Item already exists: %s", record)

    L.info("total num records %d", num_ids)
Пример #3
0
def test_get_thing_with_id_thing(session: Session):
    id = "123456"
    new_thing = Thing(id=id,
                      authority_id="test",
                      resolved_url="http://foo.bar",
                      resolved_status=200,
                      resolved_content={"foo": "bar"})
    session.add(new_thing)
    session.commit()
    shouldnt_be_none = get_thing_with_id(session, id)
    assert shouldnt_be_none is not None
    assert shouldnt_be_none.primary_key is not None
    assert id == shouldnt_be_none.id
Пример #4
0
def _save_record_to_db(session, file_path, record):
    id = record["id"]
    logging.info("got next id from smithsonian %s", id)
    existing_thing = get_thing_with_id(session, id)
    if existing_thing is not None:
        logging.info("Already have %s", id)
    else:
        logging.debug("Don't have %s", id)
        thing = isb_lib.smithsonian_adapter.load_thing(record,
                                                       datetime.datetime.now(),
                                                       file_path)
        try:
            logging.debug("Going to add thing to session")
            session.add(thing)
            logging.debug("Added thing to session")
            session.commit()
            logging.debug("committed session")
        except sqlalchemy.exc.IntegrityError as e:
            session.rollback()
            logging.error("Item already exists: %s", record)
Пример #5
0
async def _loadSesarEntries(session, max_count, start_from=None):
    L = getLogger()
    futures = []
    working = {}
    ids = isb_lib.sesar_adapter.SESARIdentifiersSitemap(
        max_entries=countThings(session) + max_count, date_start=start_from
    )
    total_requested = 0
    total_completed = 0
    more_work = True
    num_prepared = BACKLOG_SIZE  # Number of jobs to prepare for execution
    with concurrent.futures.ThreadPoolExecutor(
        max_workers=CONCURRENT_DOWNLOADS
    ) as executor:
        while more_work:
            # populate the futures list with work until the list is full
            # or there is no more work to get.
            while (
                len(futures) < BACKLOG_SIZE
                and total_requested < max_count
                and num_prepared > 0
            ):
                try:
                    _id = next(ids)
                    igsn = igsn_lib.normalize(_id[0])
                    existing_thing = sqlmodel_database.get_thing_with_id(session, isb_lib.sesar_adapter.fullIgsn(igsn))
                    if existing_thing is not None:
                        logging.info("Already have %s at %s", igsn, existing_thing)
                        future = executor.submit(wrapLoadThing, igsn, _id[1], existing_thing)
                    else:
                        future = executor.submit(wrapLoadThing, igsn, _id[1])
                    futures.append(future)
                    working[igsn] = 0
                    total_requested += 1
                except StopIteration as e:
                    L.info("Reached end of identifier iteration.")
                    num_prepared = 0
                if total_requested >= max_count:
                    num_prepared = 0
            L.debug("%s", working)
            try:
                for fut in concurrent.futures.as_completed(futures, timeout=1):
                    igsn, tc, _thing = fut.result()
                    futures.remove(fut)
                    if not _thing is None:
                        try:
                            session.add(_thing)
                            session.commit()
                        except sqlalchemy.exc.IntegrityError as e:
                            session.rollback()
                            logging.error("Item already exists: %s", _id[0])
                        # for _rel in _related:
                        #    try:
                        #        session.add(_rel)
                        #        session.commit()
                        #    except sqlalchemy.exc.IntegrityError as e:
                        #        L.debug(e)
                        working.pop(igsn)
                        total_completed += 1
                    else:
                        if working.get(igsn, 0) < 3:
                            if not igsn in working:
                                working[igsn] = 1
                            else:
                                working[igsn] += 1
                            L.info(
                                "Failed to retrieve %s. Retry = %s", igsn, working[igsn]
                            )
                            future = executor.submit(wrapLoadThing, igsn, tc)
                            futures.append(future)
                        else:
                            L.error("Too many retries on %s", igsn)
                            working.pop(igsn)
            except concurrent.futures.TimeoutError:
                # L.info("No futures to process")
                pass
            if len(futures) == 0 and num_prepared == 0:
                more_work = False
            if total_completed >= max_count:
                more_work = False
            L.info(
                "requested, completed, current = %s, %s, %s",
                total_requested,
                total_completed,
                len(futures),
            )
Пример #6
0
def test_get_thing_with_id_no_things(session: Session):
    should_be_none = get_thing_with_id(session, "12345")
    assert should_be_none is None