def update_test_strip(db: DatabaseSession, document: dict): """ Find identifier that matches the test_strip barcode within *document*. Updates *document* to have both the test_strip barcode and the identifier if found. """ strip_barcode = document["test_strip"] strip_identifier = find_identifier(db, strip_barcode) document["test_strip"] = {"uuid": None, "barcode": strip_barcode} if not strip_identifier: LOG.warning(f"Test strip has unknown barcode «{strip_barcode}»") elif strip_identifier.set_name not in expected_identifier_sets[ "test-strips"]: LOG.warning( f"Test strip barcode found in unexpected identifier set «{strip_identifier.set_name}»" ) else: document["test_strip"] = { "uuid": strip_identifier.uuid, "barcode": strip_identifier.barcode }
def process_samples(db: DatabaseSession, encounter_id: int, document: dict): """ Process an enrollment *document*'s samples. Find existing collected samples, or create skeletal sample records containing just the collection barcode linked back to this *encounter_id*. Sample manifests generated by the processing lab will usually be loaded later and fill in the rest of the sample record. """ for sample in document["sampleCodes"]: barcode = sample.get("code") if not barcode: LOG.warning(f"Skipping collected sample with no barcode") continue # XXX TODO: Stop hardcoding this and handle other types. # ScannedSelfSwab and ManualSelfSwabbed are kit barcodes, # not collection barcodes. TestStrip is an identifier # UUID, not barcode. # - trs, 17 May 2019 if sample["type"] != "ClinicSwab": LOG.warning( f"Skipping collected sample with unknown type {sample['type']}" ) continue LOG.debug(f"Looking up collected sample code «{barcode}»") identifier = find_identifier(db, barcode) if not identifier: LOG.warning( f"Skipping collected {sample['type']} sample with unknown barcode «{barcode}»" ) continue assert identifier.set_name in EXPECTED_COLLECTION_IDENTIFIER_SETS, \ f"{sample['type']} sample with unexpected «{identifier.set_name}» barcode «{barcode}»" # XXX TODO: Relationally model sample type after we choose # a standard vocabulary (LOINC or SNOMED or whatever FHIR # normalizes?) # -trs, 8 May 2019 details = { "type": sample["type"], } upsert_sample(db, update_identifiers=False, overwrite_collection_date=False, identifier=None, collection_identifier=identifier.uuid, collection_date=None, encounter_id=encounter_id, additional_details=details)
def sample_identifier(db: DatabaseSession, barcode: str) -> Optional[str]: """ Find corresponding UUID for scanned sample barcode within warehouse.identifier. """ identifier = find_identifier(db, barcode) if identifier: assert identifier.set_name == "samples", \ f"Identifier found in set «{identifier.set_name}», not «samples»" return identifier.uuid if identifier else None
def sample_identifier(db: DatabaseSession, barcode: str) -> Optional[str]: """ Find corresponding UUID for scanned sample or collection barcode within warehouse.identifier. Will be sample barcode if from UW and collection barcode if from SCH. """ identifier = find_identifier(db, barcode) if identifier: assert identifier.set_name == "samples" or \ identifier.set_name == "collections-seattleflu.org", \ f"Identifier found in set «{identifier.set_name}», not «samples»" return identifier.uuid if identifier else None
def sample_identifier(db: DatabaseSession, document: dict) -> Optional[str]: """ Given a *document*, find corresponding UUID for scanned sample or collection barcode within warehouse.identifier. """ barcode = document.get('barcode') if not barcode: return None identifier = find_identifier(db, barcode) set_name = 'collections-seattleflu.org' if identifier: assert identifier.set_name == set_name, \ f"Identifier found in set «{identifier.set_name}», not «{set_name}»" return identifier.uuid if identifier else None
def etl_manifest(*, db: DatabaseSession): LOG.debug(f"Starting the manifest ETL routine, revision {REVISION}") # XXX TODO: Stop hardcoding valid identifier sets. Instead, accept them as # an option or config (and validate option choices against what's actually # in the database). We won't want to validate using click.option(), # because that would necessitate a database connection simply to run # bin/id3c at all. # -trs, 13 May 2019 expected_identifier_sets = { "samples": {"samples"}, "collections": { "collections-environmental", "collections-fluathome.org", "collections-household-intervention", "collections-household-intervention-asymptomatic", "collections-household-observation", "collections-household-observation-asymptomatic", "collections-kiosks", "collections-kiosks-asymptomatic", "collections-seattleflu.org", "collections-swab&send", "collections-swab&send-asymptomatic", "collections-self-test", "collections-scan", "collections-haarvi", "samples-haarvi", }, "rdt": {"collections-fluathome.org"} } # Fetch and iterate over samples that aren't processed # # Rows we fetch are locked for update so that two instances of this # command don't try to process the same samples. LOG.debug("Fetching unprocessed manifest records") manifest = db.cursor("manifest") manifest.execute(""" select manifest_id as id, document from receiving.manifest where not processing_log @> %s order by id for update """, (Json([{ "etl": ETL_NAME, "revision": REVISION }]),)) for manifest_record in manifest: with db.savepoint(f"manifest record {manifest_record.id}"): LOG.info(f"Processing record {manifest_record.id}") # Convert sample barcode to full identifier, ensuring it's # known and from the correct identifier set. sample_barcode = manifest_record.document.pop("sample") sample_identifier = find_identifier(db, sample_barcode) if not sample_identifier: LOG.warning(f"Skipping sample with unknown sample barcode «{sample_barcode}»") mark_skipped(db, manifest_record.id) continue if (manifest_record.document.get("sample_type") and manifest_record.document["sample_type"] == "rdt"): assert sample_identifier.set_name in expected_identifier_sets["rdt"], \ (f"Sample identifier found in set «{sample_identifier.set_name}», " + f"not {expected_identifier_sets['rdt']}") else: assert sample_identifier.set_name in expected_identifier_sets["samples"], \ (f"Sample identifier found in set «{sample_identifier.set_name}», " + f"not {expected_identifier_sets['samples']}") # Optionally, convert the collection barcode to full # identifier, ensuring it's known and from the correct # identifier set. collection_barcode = manifest_record.document.pop("collection", None) collection_identifier = find_identifier(db, collection_barcode) if collection_barcode else None if collection_barcode and not collection_identifier: LOG.warning(f"Skipping sample with unknown collection barcode «{collection_barcode}»") mark_skipped(db, manifest_record.id) continue assert not collection_identifier \ or collection_identifier.set_name in expected_identifier_sets["collections"], \ f"Collection identifier found in set «{collection_identifier.set_name}», not {expected_identifier_sets['collections']}" # type: ignore # Sample collection date collection_date = manifest_record.document.get("date") # Upsert sample cooperatively with enrollments ETL routine # # The details document was intentionally modified by two pop()s # earlier to remove barcodes that were looked up. The # rationale is that we want just one clear place in the # warehouse for each piece of information. sample, status = upsert_sample(db, identifier = sample_identifier.uuid, collection_identifier = collection_identifier.uuid if collection_identifier else None, collection_date = collection_date, additional_details = manifest_record.document) mark_loaded(db, manifest_record.id, status = status, sample_id = sample.id) LOG.info(f"Finished processing manifest record {manifest_record.id}")
def process_encounter_samples(db: DatabaseSession, encounter: Encounter, encounter_id: int, related_resources: Dict[str, List[DomainResource]]): """ Given a dict of *related_resources*, finds Specimens linked to the given *encounter*. Linked Specimens are attached the given *encounter_id* via newly upserted samples in ID3C. """ def is_related_specimen(observation: Observation, encounter: Encounter) -> bool: return bool(observation.encounter) and observation.encounter.resolved(Encounter) == encounter def related_specimens(encounter: Encounter, resources: Dict[str, List[DomainResource]]) -> Optional[List[Specimen]]: """ Given a dict of FHIR *resources*, returns a list of Specimens linked to a given *encounter*. """ observations = resources.get('Observation') if not observations: return None related_observations = list(filter(lambda o: is_related_specimen(o, encounter), observations)) specimens = list(map(lambda o: o.specimen.resolved(Specimen), related_observations)) if not specimens: LOG.warning("Encounter specimen not found.") return None return specimens specimens = related_specimens(encounter, related_resources) if not specimens: return for specimen in specimens: barcode = identifier(specimen, f"{INTERNAL_SYSTEM}/sample").strip() if not barcode: raise Exception("No barcode detectable. Either the barcode identification system is " f"not «{INTERNAL_SYSTEM}/sample», or the barcode value is empty, which " "violates the FHIR docs.") LOG.debug(f"Looking up collected specimen barcode «{barcode}»") specimen_identifier = find_identifier(db, barcode) if not specimen_identifier: LOG.warning(f"Skipping collected specimen with unknown barcode «{barcode}»") continue assert (specimen_identifier.set_name in EXPECTED_COLLECTION_IDENTIFIER_SETS or specimen_identifier.set_name in EXPECTED_SAMPLE_IDENTIFIER_SETS), \ f"Specimen with unexpected «{specimen_identifier.set_name}» barcode «{barcode}»" sample_identifier: str = None collection_identifier: str = None if specimen_identifier.set_name in EXPECTED_COLLECTION_IDENTIFIER_SETS: collection_identifier = specimen_identifier.uuid elif specimen_identifier.set_name in EXPECTED_SAMPLE_IDENTIFIER_SETS: sample_identifier = specimen_identifier.uuid else: assert False, "logic bug" sample_details = {} if specimen.note: sample_details['note'] = specimen.note[0].text additional_details = { **specimen.type.as_json(), **sample_details} # XXX TODO: Improve details object here; the current approach produces # an object like {"coding": [{…}]} which isn't very useful. upsert_sample(db, identifier = sample_identifier, collection_identifier = collection_identifier, encounter_id = encounter_id, additional_details = additional_details)
def process_diagnostic_report_bundle_entry(db: DatabaseSession, bundle: Bundle, entry: BundleEntry): """ Given an DiagnosticReport resource *entry* from a given *bundle*, processes the relevant information into the database. """ resource, resource_type = resource_and_resource_type(entry) if resource_type != 'DiagnosticReport': return LOG.debug(f"Processing DiagnosticReport Resource «{entry.fullUrl}».") for reference in resource.specimen: barcode = None if not reference.identifier: specimen = reference.resolved(Specimen) barcode = identifier(specimen, f"{INTERNAL_SYSTEM}/sample").strip() elif matching_system(reference.identifier, INTERNAL_SYSTEM): barcode = reference.identifier.value.strip() if not barcode: continue LOG.debug(f"Looking up collected specimen barcode «{barcode}»") specimen_identifier = find_identifier(db, barcode) if not specimen_identifier: LOG.warning(f"Skipping collected specimen with unknown barcode «{barcode}»") continue # By default, assume that the incoming barcode is for a collection identifier is_collection_identifier = True try: assert specimen_identifier.set_name in EXPECTED_COLLECTION_IDENTIFIER_SETS, \ f"Specimen with unexpected «{specimen_identifier.set_name}» barcode «{barcode}»" except AssertionError: assert specimen_identifier.set_name in EXPECTED_SAMPLE_IDENTIFIER_SETS, \ f"Specimen with unexpected «{specimen_identifier.set_name}» barcode «{barcode}»" is_collection_identifier = False sample = find_sample(db, specimen_identifier.uuid) if not is_collection_identifier and not sample: raise SampleNotFoundError("No sample with identifier «{specimen_identifier.uuid}» found.") # Sometimes the Ellume samples come in faster than the specimen manifest # is updated. In this case, create a new collection identifier that will # be filled in later. if not sample: LOG.debug(f"Creating sample with collection identifier «{specimen_identifier.uuid}»") sample = db.fetch_row(""" insert into warehouse.sample (collection_identifier) values (%s) returning sample_id as id, collection_identifier """, (str(specimen_identifier.uuid),)) LOG.info(f"Created sample {sample.id} with collection identifier «{sample.collection_identifier}»") process_presence_absence_tests(db, resource, sample.id, barcode)
def etl_presence_absence(*, db: DatabaseSession): LOG.debug(f"Starting the presence_absence ETL routine, revision {REVISION}") # Fetch and iterate over presence-absence tests that aren't processed # # Rows we fetch are locked for update so that two instances of this # command don't try to process the same presence-absence tests. LOG.debug("Fetching unprocessed presence-absence tests") presence_absence = db.cursor("presence_absence") presence_absence.itersize = 1 presence_absence.execute(""" select presence_absence_id as id, document, received::date as received_date from receiving.presence_absence where not processing_log @> %s order by id for update """, (Json([{ "revision": REVISION }]),)) for group in presence_absence: with db.savepoint(f"presence_absence group {group.id}"): LOG.info(f"Processing presence_absence group {group.id}") # Samplify will now send documents with a top level key # "samples". The new format also includes a "chip" key for each # sample which is then included in the unique identifier for # each presence/absence result # -Jover, 14 Nov 2019 try: received_samples = group.document["samples"] except KeyError as error: # Skip documents in the old format because they do not # include the "chip" key which is needed for the # unique identifier for each result. # -Jover, 14 Nov 2019 # Also skip old format to avoid ingesting wrong data from # plate swapped data! This will lead to 188 samples with the # wrong nwgc_id associated with them. # -Jover, 06 Dec 2019 if (group.document.get("store") is not None or group.document.get("Update") is not None): LOG.info("Skipping presence_absence record that is in old format") mark_processed(db, group.id) continue else: raise error from None for received_sample in received_samples: received_sample_barcode = received_sample.get("investigatorId") if not received_sample_barcode: LOG.info(f"Skipping sample «{received_sample['sampleId']}» without SFS barcode") continue # Don't go any further if the sample is marked as Failed sample_failed = received_sample.get("sampleFailed") if sample_failed is True: LOG.info(f"Skipping sample «{received_sample_barcode}» that has been failed") continue # Don't go any further if there are no results to import. test_results = received_sample["targetResults"] if not test_results: LOG.warning(f"Skipping sample «{received_sample_barcode}» without any results") continue received_sample_id = str(received_sample["sampleId"]) chip = received_sample.get("chip") extraction_date = received_sample.get("extractionDate") assay_name = received_sample.get("assayName") assay_date = received_sample.get("assayDate") # The assayType field will be removed after Samplify starts # sending us OpenArray results with target.clinicalStatus. # # kfay, 28 Dec 2020 assay_type = received_sample.get("assayType") # Guard against empty chip values assert chip or "chip" not in received_sample, "Received bogus chip id" # Must be current results LOG.info(f"Processing sample «{received_sample_barcode}»") if not received_sample.get("isCurrentExpressionResult"): LOG.warning(f"Skipping out-of-date results for sample «{received_sample_barcode}»") continue # Barcode must match a known identifier db_identifier = find_identifier(db, received_sample_barcode) if db_identifier: assert db_identifier.set_name in valid_identifiers, \ f"Identifier found in invalid set «{db_identifier.set_name}»" tiny_swab = False if db_identifier.set_name.find('tiny-swab') >= 0: tiny_swab = True else: LOG.warning(f"Skipping results for sample without a known identifier «{received_sample_barcode}»") continue received_sample_identifier = db_identifier.uuid # Track Samplify's internal ids for our samples, which is # unfortunately necessary for linking genomic data NWGC also # sends. if tiny_swab: sample = update_sample(db, collection_identifier = received_sample_identifier, additional_details = sample_details(received_sample)) else: sample = update_sample(db, identifier = received_sample_identifier, additional_details = sample_details(received_sample)) # Finally, process all results. for test_result in test_results: test_result_target_id = test_result["geneTarget"] LOG.debug(f"Processing target «{test_result_target_id}» for \ sample «{received_sample_barcode}»") # Skip this result if it's actually a non-result present = target_present(test_result) if present is ...: LOG.debug(f"No test result for «{test_result_target_id}», skipping") continue # Most of the time we expect to see existing targets so a # select-first approach makes the most sense to avoid useless # updates. target = find_or_create_target(db, identifier = test_result_target_id, control = target_control(test_result["controlStatus"])) # The unique identifier for each result. If chip is # applicable, then it's included to differentiate the same # sample being run on multiple chips (uncommon, but it # happens). if chip: identifier = f"NWGC/{received_sample_id}/{target.identifier}/{chip}" else: identifier = f"NWGC/{received_sample_id}/{target.identifier}" # Most of the time we expect to see new samples and new # presence_absence tests, so an insert-first approach makes more sense. # Presence-absence tests we see more than once are presumed to be # corrections. upsert_presence_absence(db, identifier = identifier, sample_id = sample.id, target_id = target.id, present = present, details = presence_absence_details(test_result, group.received_date, chip, extraction_date, assay_name, assay_date, assay_type)) mark_processed(db, group.id) LOG.info(f"Finished processing presence_absence group {group.id}")
def kit_enrollments(*, db: DatabaseSession): LOG.debug( f"Starting the kit enrollments ETL routine, revision {ENROLLMENTS_REVISION}" ) expected_barcode_types = {"ScannedSelfSwab", "ManualSelfSwab"} LOG.debug("Fetching unprocessed enrollments") enrollments = db.cursor("enrollments") enrollments.execute( """ select enrollment_id as id, document from receiving.enrollment where not processing_log @> %s order by id for update """, (Json([{ "etl": ETL_NAME, "revision": ENROLLMENTS_REVISION }]), )) for enrollment in enrollments: with db.savepoint(f"enrollment {enrollment.id}"): LOG.info(f"Processing enrollment {enrollment.id}") # Find encounter that should have been created # from this enrollment record through etl enrollments encounter = find_encounter(db, enrollment.document["id"]) # Error out the kit etl process if no encounter found # The kit etl process can try again starting with this record # next time with the idea that the encounter will be # created by then. if not encounter: raise EncounterNotFoundError( f"No encounter with identifier «{enrollment.document['id']}» found" ) # Skip and mark the enrollment document as processed if the # encounter found is linked to a site that is not self-test if encounter.site != "self-test": LOG.debug( f"Found encounter {encounter.id} «{encounter.identifier}»" + f"linked to site «{encounter.site}», not 'self-test'") mark_enrollment_processed(db, enrollment.id) continue for code in enrollment.document["sampleCodes"]: barcode = code.get("code") # Kit must have a barcode if not barcode: LOG.warning(f"No barcode found in sampleCodes {code}") continue # Barcode must be of expected barcode type if code["type"] not in expected_barcode_types: LOG.debug(f"Skipping barcode with type {code['type']}") continue # Convert kit barcode to full identifier kit_identifier = find_identifier(db, barcode) if not kit_identifier: LOG.warning( f"Skipping kit with unknown barcode «{barcode}»") continue if kit_identifier.set_name not in expected_identifier_sets[ "kits"]: LOG.warning( f"Skipping kit with identifier found in " + f"set «{kit_identifier.set_name}» not {expected_identifier_sets['kits']}" ) continue details = {"type": code["type"]} kit, status = upsert_kit_with_encounter( db, identifier=kit_identifier.uuid, encounter_id=encounter.id, additional_details=details) if status == "updated": update_kit_samples(db, kit) mark_enrollment_processed(db, enrollment.id) LOG.info(f"Finished processing enrollment {enrollment.id}")
def kit_manifests(*, db: DatabaseSession): LOG.debug( f"Starting the kits manifests ETL routine, revision {MANIFEST_REVISION}" ) LOG.debug("Fetching unprocessed manifest records") manifest = db.cursor("manifest") manifest.execute( """ select manifest_id as id, document from receiving.manifest where not processing_log @> %s order by id for update """, (Json([{ "etl": ETL_NAME, "revision": MANIFEST_REVISION }]), )) for manifest_record in manifest: with db.savepoint(f"manifest record {manifest_record.id}"): LOG.info(f"Processing record {manifest_record.id}") # Mark record as skipped # if it does not contain a kit related sample if "kit" not in manifest_record.document: LOG.info( f"Skipping manifest record {manifest_record.id} without kit data" ) mark_skipped(db, manifest_record.id) continue sample_barcode = manifest_record.document.pop("sample") sample_identifier = find_identifier(db, sample_barcode) # Mark record as skipped # if it has an unknown sample barcode if not sample_identifier: LOG.warning( f"Skipping manifest record with unknown sample barcode «{sample_barcode}»" ) mark_skipped(db, manifest_record.id) continue # Mark record as skipped sample identifier set is unexpected if sample_identifier.set_name not in expected_identifier_sets[ "samples"]: LOG.warning( f"Skipping manifest record with sample identifier found in " + f"set «{sample_identifier.set_name}», not {expected_identifier_sets['samples']}" ) mark_skipped(db, manifest_record.id) continue # Find sample that should have been created from this # manifest record via etl manifest sample = find_sample(db, sample_identifier.uuid) # Error out the kit etl process if no sample found # The kit etl process can try again starting with this record # next time with the idea that the sample will be # created by then. if not sample: raise SampleNotFoundError( f"No sample with «{sample_identifier.uuid}» found") # Mark record as skipped if the sample does not have a # sample type (utm or rdt) if sample.type not in {"utm", "rdt"}: LOG.info(f"Skipping manifest record {manifest_record.id} " + f"with unknown sample type {sample.type}") mark_skipped(db, manifest_record.id) continue kit_barcode = manifest_record.document.pop("kit") kit_identifier = find_identifier(db, kit_barcode) # Mark record as skipped if it has an unknown kit barcode if not kit_identifier: LOG.warning( f"Skipping kit with unknown barcode «{kit_barcode}»") mark_skipped(db, manifest_record.id) continue # Mark record as skipped if kit identifier set is unexpected if kit_identifier.set_name not in expected_identifier_sets["kits"]: LOG.warning( f"Skipping kit with identifier found in " + f"set «{kit_identifier.set_name}» not {expected_identifier_sets['kits']}" ) mark_skipped(db, manifest_record.id) continue # List of extra data not needed for kit record that can # be removed before adding manifest document to kit details extra_data = [ "collection", "sample_type", "aliquot_date", "aliquots", "racks" ] for key in extra_data: manifest_record.document.pop(key, None) # Try to find identifier for the test-strip barcode for rdt samples if sample.type == "rdt": update_test_strip(db, manifest_record.document) kit, status = upsert_kit_with_sample( db, identifier=kit_identifier.uuid, sample=sample, additional_details=manifest_record.document) if status == "updated": update_sample(db, sample, kit.encounter_id) mark_loaded(db, manifest_record.id, status, kit.id)
def etl_manifest(*, db: DatabaseSession): LOG.debug(f"Starting the manifest ETL routine, revision {REVISION}") # XXX TODO: Stop hardcoding valid identifier sets. Instead, accept them as # an option or config (and validate option choices against what's actually # in the database). We won't want to validate using click.option(), # because that would necessitate a database connection simply to run # bin/id3c at all. # -trs, 13 May 2019 expected_identifier_sets = { "samples": {"samples"}, "collections": { "collections-environmental", "collections-fluathome.org", "collections-household-intervention", "collections-household-intervention-asymptomatic", "collections-household-observation", "collections-household-observation-asymptomatic", "collections-kiosks", "collections-kiosks-asymptomatic", "collections-seattleflu.org", "collections-swab&send", "collections-swab&send-asymptomatic", "collections-self-test", "collections-scan", "collections-scan-kiosks", "collections-haarvi", "samples-haarvi", "collections-validation", "collections-uw-home", "collections-uw-observed", "collections-household-general", "collections-childcare", "collections-school-testing-home", "collections-school-testing-observed", "collections-apple-respiratory", "collections-apple-respiratory-serial", "collections-adult-family-home-outbreak", "collections-workplace-outbreak", }, "rdt": {"collections-fluathome.org"} } # Fetch and iterate over samples that aren't processed # # Rows we fetch are locked for update so that two instances of this # command don't try to process the same samples. LOG.debug("Fetching unprocessed manifest records") manifest = db.cursor("manifest") manifest.execute( """ select manifest_id as id, document from receiving.manifest where not processing_log @> %s order by id for update """, (Json([{ "etl": ETL_NAME, "revision": REVISION }]), )) for manifest_record in manifest: with db.savepoint(f"manifest record {manifest_record.id}"): LOG.info(f"Processing record {manifest_record.id}") # When updating an existing row, update the identifiers # only if the record has both the 'sample' and # 'collection' keys. should_update_identifiers = "sample" in manifest_record.document \ and "collection" in manifest_record.document # Sample collection date # Don't pop this entry off the document. For backwards # compatibility reasons, keep it in the document so that 'date' # also gets written to the 'details' column in warehouse.sample. collected_date = manifest_record.document.get("date", None) # Attempt to find barcodes and their related identifiers sample_barcode = manifest_record.document.pop("sample", None) sample_identifier = find_identifier( db, sample_barcode) if sample_barcode else None collection_barcode = manifest_record.document.pop( "collection", None) collection_identifier = find_identifier( db, collection_barcode) if collection_barcode else None # Skip a record if it has no associated barcodes if not sample_barcode and not collection_barcode: LOG.warning( f"Skipping record «{manifest_record.id}» because it has neither a sample " "barcode nor a collection barcode") mark_skipped(db, manifest_record.id) continue # Skip a record if it has a sample barcode but the barcode doesn't match an identifier if sample_barcode and not sample_identifier: LOG.warning( f"Skipping sample with unknown sample barcode «{sample_barcode}»" ) mark_skipped(db, manifest_record.id) continue # Skip a record if it has a collection barcode but the barcode doesn't match an identifier if collection_barcode and not collection_identifier: LOG.warning( f"Skipping sample with unknown collection barcode «{collection_barcode}»" ) mark_skipped(db, manifest_record.id) continue # Skip a record if the collection identifier is from an unexpected set if collection_identifier and collection_identifier.set_name not in expected_identifier_sets[ "collections"]: LOG.warning( f"Skipping sample because collection identifier found in set «{collection_identifier.set_name}», not \ {expected_identifier_sets['collections']}") mark_skipped(db, manifest_record.id) continue # Validate the sample identifer and assert if a record fails if sample_identifier: if (manifest_record.document.get("sample_type") and manifest_record.document["sample_type"] == "rdt"): assert sample_identifier.set_name in expected_identifier_sets["rdt"], \ (f"Sample identifier found in set «{sample_identifier.set_name}»," + f"not {expected_identifier_sets['rdt']}") else: assert sample_identifier.set_name in expected_identifier_sets["samples"], \ (f"Sample identifier found in set «{sample_identifier.set_name}», " + f"not {expected_identifier_sets['samples']}") # Upsert sample cooperatively with enrollments ETL routine # # The details document was intentionally modified by two pop()s # earlier to remove barcodes that were looked up. # The rationale is that we want just one clear place in the # warehouse for each piece of information. sample, status = upsert_sample( db, update_identifiers=should_update_identifiers, identifier=sample_identifier.uuid if sample_identifier else None, collection_identifier=collection_identifier.uuid if collection_identifier else None, collection_date=collected_date, additional_details=manifest_record.document) mark_loaded(db, manifest_record.id, status=status, sample_id=sample.id) LOG.info( f"Finished processing manifest record {manifest_record.id}")