Exemplo n.º 1
0
def create_layout_pdf(cfg: Config, entry_data: Dict[str, str]):
    """Main function to read a layout file and write the resulting plate layout map.

    Parameters
    ----------
    cfg: Config
        configuration information
    entry_data: Dict[str, str]
        dictionary containing the response that was submitted to Sample Plate Metada.
        The required keys are: the researcher, timestamp, sample plate barcode, and a
        link to the sample plate map in Google Drive. Optionally, the "local_run" key
        is used as a flag to indicate the script is being run from the command line
        rather than on AWS.
    """
    sample_barcode = entry_data[SampleMetadata.SAMPLE_PLATE_BARCODE]
    output_filename = f"{sample_barcode}.pdf"

    if LOCAL_RUN in entry_data:
        output_path, drive_service = entry_data[LOCAL_RUN]
        output_file_object = (output_path / output_filename).open("wb")
    else:
        logger.debug("getting gdrive credentials")
        google_creds = gutils.get_secrets_manager_credentials()
        drive_service = drive.get_service(google_creds)

        processed_layout_folder_id = drive.get_folder_id_of_path(
            drive_service, cfg.LAYOUT_PDF_FOLDER)

        output_file_object = drive.put_file(
            drive_service,
            processed_layout_folder_id,
            output_filename,
            binary=True,
        )

    try:
        plate_map_file = drive.get_layout_file_from_url(
            drive_service, entry_data[SampleMetadata.SAMPLE_PLATE_MAP])
    except KeyError:
        raise BadDriveURL(
            f"Bad URL in {SampleMetadata.SHEET_NAME} for {sample_barcode}")

    plate_map_type = accession.get_plate_map_type_from_name(
        plate_map_file.name)

    with plate_map_file.open() as fh:
        accession_data = accession.read_accession_data(plate_map_type, fh)

    logger.info(f"Writing layout map to {output_filename}")
    with output_file_object as output_fh:
        format_pdf(
            entry_data[SampleMetadata.SAMPLE_PLATE_BARCODE],
            accession_data,
            entry_data[SampleMetadata.RESEARCHER_NAME],
            format_time(cfg, entry_data[SampleMetadata.TIMESTAMP]),
            output_fh,
        )
Exemplo n.º 2
0
    def __init__(self, cfg, google_creds):
        self.cfg = cfg
        self.drive_service = drive.get_service(google_creds)
        self.verbose_data = [VERBOSE_SHEET_HEADER]
        self.clin_lab_data = [CLIN_LAB_SHEET_HEADER]
        self.supervisor_plate_queue_data = [SUPERVISOR_PLATE_QUEUE_HEADER]

        self.gc = gutils.get_gspread_connection(google_creds)
        logger.info(msg="Initializing processing resources")
        self.processing_resources = ProcessingResources(
            self.drive_service, self.cfg)
Exemplo n.º 3
0
def lambda_handler(event, context):
    try:
        cfg = Config()
        create_logger(cfg)
        creds = gutils.get_secrets_manager_credentials()
        os.chdir("/tmp")

        # Set up for Google Drive
        drive_service = drive.get_service(creds)
        sheets_service = sheets.get_service(creds)

        # Route based on the actions
        action = None
        if "body" in event:
            event_body = json.loads(event["body"])
            logger.info(msg=f"EVENT_BODY: {event_body}")
            action = event_body["action"]

        logger.info(msg=f"ACTION: {action}")
        if action == "external_sample_shipment":
            external_sample_shipment.handle_external_sample_shipment_request(
                cfg, drive_service, event_body)
        elif action == "sample_database":
            sample_database.external_sample_database(cfg, drive_service,
                                                     sheets_service,
                                                     event_body)
        elif action == "draw_96_plate_map":
            draw_96_plate_map.draw_96_plate_map(cfg, drive_service,
                                                sheets_service, event_body)
        elif action == "concat_96_384":
            concat_96_384.concat_96_384(cfg, drive_service, sheets_service,
                                        event_body)
        elif action == "bind_index_plate":
            bind_index_plate.handle_bind_index_plate_request(
                cfg, drive_service, sheets_service, event_body)
        elif action == "update_ripe_samples":
            update_ripe_samples.update_ripe_samples(cfg, drive_service,
                                                    sheets_service, event_body)
        elif action == "metadata_lookup":
            metadata_lookup.metadata_lookup(cfg, drive_service, sheets_service,
                                            event_body)

        return {
            "statusCode": 200,
            "headers": {
                "Content-Type": "text/html"
            },
            "body": "OK",
        }
    except Exception as err:
        slack.post(f"*Error in mNGS scripts:*\n{err}")
        raise
Exemplo n.º 4
0
def fetch_barcodes(args, cfg):
    google_credentials = gutils.get_secrets_manager_credentials(args.secret_id)
    drive_service = drive.get_service(google_credentials)

    # qpcr logs folder
    logs_folder_id = drive.get_folder_id_of_path(drive_service,
                                                 cfg.PCR_LOGS_FOLDER)
    logs_folder_contents = drive.get_contents_by_folder_id(drive_service,
                                                           logs_folder_id,
                                                           only_files=True)

    barcodes_to_fetch = defaultdict(RunFiles)
    for entry in logs_folder_contents:
        m = RunFiles.get_qpcr_file_type(entry.name)
        if m is None:
            continue
        elif m[RunFiles.BARCODE] in args.barcodes:
            barcodes_to_fetch[m[RunFiles.BARCODE]].add_file(m, entry)

    for barcode, barcode_files in barcodes_to_fetch.items():
        # all files must be present, at least one quant_amp file
        if not barcode_files.all_files:
            logger.warning(msg=f"Missing files for {barcode}!")
            continue

        logger.info(msg=f"Found sample to fetch: {barcode}")

        # read in the run information and quant cq
        run_info = barcode_files.run_info
        logger.info(msg=f"    Downloading: {run_info.name}")
        with drive.get_file(drive_service, run_info.id, binary=False) as fh:
            with (args.output_dir / run_info.name).open("w") as out:
                out.write(fh.read())

        quant_cq = barcode_files.quant_cq
        logger.info(msg=f"    Downloading: {quant_cq.name}")
        with drive.get_file(drive_service, quant_cq.id, binary=False) as fh:
            with (args.output_dir / quant_cq.name).open("w") as out:
                out.write(fh.read())

        for quant_amp in barcode_files.quant_amp.values():
            logger.info(msg=f"    Downloading: {quant_amp.name}")
            with drive.get_file(drive_service, quant_amp.id,
                                binary=False) as fh:
                with (args.output_dir / quant_amp.name).open("w") as out:
                    out.write(fh.read())
Exemplo n.º 5
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument("barcodes", nargs="+")
    parser.add_argument("--output-dir", type=Path, default=Path("."))

    parser.add_argument("--debug", action="store_true")
    parser.add_argument("--secret-id", default="covid-19/google_creds")

    args = parser.parse_args()

    cfg = Config()
    create_logger(cfg, debug=args.debug)

    google_creds = gutils.get_secrets_manager_credentials(args.secret_id)
    drive_service = drive.get_service(google_creds)

    logger.debug("Downloading collective form")
    collective_form = CollectiveForm(
        drive_service, cfg["DATA"]["collection_form_spreadsheet_id"])
    sample_plate_metadata = collective_form[SampleMetadata.SHEET_NAME]

    for barcode in args.barcodes:
        try:
            metadata_row = clean_single_row(
                sample_plate_metadata, SampleMetadata.SAMPLE_PLATE_BARCODE,
                barcode)
        except MetadataNotFoundError:
            logger.error(f"0 results for {barcode}, skipping")
            continue
        except MultipleRowsError as ex:
            logger.error(f"{ex.match_count} results for {barcode}, skipping")
            continue
        metadata_row[SampleMetadata.TIMESTAMP] = str(
            metadata_row[SampleMetadata.TIMESTAMP])
        metadata_row[LOCAL_RUN] = (args.output_dir, drive_service)

        logger.debug(f"Making layout PDF for {barcode}")
        create_layout_pdf(cfg=cfg, entry_data=metadata_row)
Exemplo n.º 6
0
 def __init__(self, google_credentials, config):
     self.drive_service = drive.get_service(google_credentials)
     self.config = config
Exemplo n.º 7
0
def gdrive_service():
    """This fixture sets up a gdrive service object."""
    return get_service(credentials_for_tests())
Exemplo n.º 8
0
def processing(cfg: Config, google_credentials: service_account.Credentials):
    git_info = get_git_info()
    drive_service = drive.get_service(google_credentials)
    logger.info(msg=f"Starting processing loop with code version: {git_info}")

    # qpcr logs folder
    logs_folder_id = drive.get_folder_id_of_path(drive_service,
                                                 cfg.PCR_LOGS_FOLDER)

    # markers folder
    markers_folder_id = drive.get_folder_id_of_path(drive_service,
                                                    cfg.PCR_MARKERS_FOLDER)

    # csv results folder
    csv_results_folder_id = drive.get_folder_id_of_path(
        drive_service, cfg.CSV_RESULTS_FOLDER)

    # CB rad results folder
    cb_report_folder_id = drive.get_folder_id_of_path(
        drive_service, cfg.CHINA_BASIN_CSV_REPORTS_FOLDER)

    # final reports folder
    final_results_folder_id = drive.get_folder_id_of_path(
        drive_service, cfg.FINAL_REPORTS_FOLDER)

    # get the collection spreadsheet
    collective_form = CollectiveForm(
        drive_service, cfg["DATA"]["collection_form_spreadsheet_id"])

    logs_folder_contents = drive.get_contents_by_folder_id(drive_service,
                                                           logs_folder_id,
                                                           only_files=True)
    marker_folder_contents = drive.get_contents_by_folder_id(drive_service,
                                                             markers_folder_id,
                                                             only_files=True)
    plate_layout_folder_id = drive.get_folder_id_of_path(
        drive_service, cfg.PLATE_LAYOUT_FOLDER)
    completed_barcodes = set(marker_folder_entry.name
                             for marker_folder_entry in marker_folder_contents)

    sample_metadata_form = collective_form[SampleMetadata.SHEET_NAME]
    rerun_form = collective_form[SampleRerun.SHEET_NAME]

    # group log file entries by barcode
    logger.info(msg="Checking for samples to process")

    barcodes_to_process = defaultdict(RunFiles)
    for entry in logs_folder_contents:
        m = RunFiles.get_qpcr_file_type(entry.name)
        if m is None or m[RunFiles.BARCODE] in completed_barcodes:
            continue
        else:
            barcodes_to_process[m[RunFiles.BARCODE]].add_file(m, entry)

    for barcode, barcode_files in barcodes_to_process.items():
        # all files must be present, at least one quant_amp file
        if not barcode_files.all_files:
            message = f"Missing files for: {barcode}. Skipping for now"
            logger.critical(msg=message, extra={"notify_slack": True})
            continue

        try:
            logger.info(msg=f"Found sample to process, barcode: {barcode}")

            logger.info(msg=f"Getting metadata and data for: {barcode}")
            bravo_metadata = BravoMetadata.load_from_spreadsheet(
                barcode,
                collective_form,
            )
            if bravo_metadata.sop_protocol is None:
                message = f"Skipping sample plate: {barcode}, no protocol"
                logger.critical(msg=message, extra={"notify_slack": True})
                continue

            protocol = get_protocol(bravo_metadata.sop_protocol)

            if not set(barcode_files.quant_amp).issuperset(protocol.mapping):
                missing = map(
                    str,
                    set(protocol.mapping) - set(barcode_files.quant_amp))
                message = f"Missing quant amp files for {barcode}: {', '.join(missing)}"
                logger.critical(msg=message, extra={"notify_slack": True})
                continue

            # process well data and check controls, return results
            logger.info(
                msg=f"Processing well data and controls for: {barcode}")
            accession_data = accession.get_accession_data_with_rerun(
                drive_service,
                plate_layout_folder_id,
                sample_metadata_form,
                rerun_form,
                bravo_metadata.sample_barcode,
            )

            control_wells = get_control_wells_from_type(
                controls_type=bravo_metadata.controls_type,
                accession_data=accession_data,
            )
            update_accession_data_with_controls(control_wells, accession_data,
                                                barcode)

            processing_results = process_barcode(
                cfg,
                barcode,
                barcode_files,
                bravo_metadata,
                protocol,
                control_wells,
                accession_data,
            )

            with drive.put_file(
                    drive_service,
                    csv_results_folder_id,
                    processing_results.results_filename,
            ) as fh:
                processing_results.write_results(fh)

            china_basin_result_file = drive.put_file(
                drive_service,
                cb_report_folder_id,
                processing_results.cb_report_filename,
            )
            with china_basin_result_file as fh:
                processing_results.write_cb_report(fh)

            # create pdf report
            logger.info(
                msg=f"Generating and uploading results PDF for: {barcode}")
            final_pdf = io.BytesIO()
            create_final_pdf(processing_results, final_pdf)
            pdf_results_file = drive.put_file(
                drive_service,
                final_results_folder_id,
                processing_results.final_pdf_filename,
            )
            with pdf_results_file as out_fh:
                out_fh.write(final_pdf.getvalue())

            logger.info(msg=f"Sending email report: {barcode}")
            mail.send_email(
                google_credentials,
                sender=cfg["EMAIL"].get("sender"),
                recipients=cfg["EMAIL"].get("recipients"),
                subject=_format_email_subject(
                    sample_barcode=bravo_metadata.sample_barcode,
                    qpcr_barcode=barcode,
                ),
                body=_format_email_body(
                    sample_barcode=bravo_metadata.sample_barcode,
                    results_file_id=china_basin_result_file.id,
                ),
                attachments={processing_results.final_pdf_filename: final_pdf},
            )

            message = (
                f"Processed sample plate: {bravo_metadata.sample_barcode}-{barcode}"
                f" using rev {git_info}")
            logger.critical(msg=message, extra={"notify_slack": True})
            # write a marker so we don't process this file again.
            processing_results.write_marker_file(drive_service,
                                                 markers_folder_id)

        except Exception as err:
            logger.critical(f"Error in [{cfg.aws_env}]: {err}",
                            extra={"notify_slack": True})
            logger.exception("Details:")
Exemplo n.º 9
0
def parse_qpcr_csv(args):
    cfg = Config()
    create_logger(cfg, debug=args.debug)

    logger.info(msg=f"Started local processing in: {args.qpcr_run_path}")

    if args.use_gdrive and not args.barcodes:
        raise ValueError(
            "You must specify barcodes to process from Google Drive")

    run_path = pathlib.Path(args.qpcr_run_path)

    google_credentials = gutils.get_secrets_manager_credentials(args.secret_id)

    drive_service = drive.get_service(google_credentials)
    collective_form = CollectiveForm(
        drive_service, cfg["DATA"]["collection_form_spreadsheet_id"])

    sample_metadata_form = collective_form[SampleMetadata.SHEET_NAME]
    rerun_form = collective_form[SampleRerun.SHEET_NAME]

    if args.use_gdrive:
        logs_folder_id = drive.get_folder_id_of_path(drive_service,
                                                     cfg.PCR_LOGS_FOLDER)
        logs_folder_contents = [
            drive_file for drive_file in drive.get_contents_by_folder_id(
                drive_service, logs_folder_id, only_files=True)
        ]

        plate_layout_folder_id = drive.get_folder_id_of_path(
            drive_service, cfg.PLATE_LAYOUT_FOLDER)
    else:
        logs_folder_contents = run_path.glob("*.csv")

    barcodes_to_process = defaultdict(RunFiles)
    for run_file in logs_folder_contents:
        m = RunFiles.get_qpcr_file_type(run_file.name)
        if m is None:
            continue
        elif args.barcodes and m[RunFiles.BARCODE] not in args.barcodes:
            continue
        else:
            barcodes_to_process[m[RunFiles.BARCODE]].add_file(m, run_file)

    for barcode, barcode_files in barcodes_to_process.items():
        # all files must be present, at least one quant_amp file
        if not barcode_files.all_files:
            message = f"Missing files for: {barcode}. Skipping for now"
            logger.info(msg=message)
            continue

        logger.info(msg=f"Found sample to process, barcode: {barcode}")

        logger.info(msg=f"Getting metadata and data for: {barcode}")
        bravo_metadata = BravoMetadata.load_from_spreadsheet(
            barcode, collective_form)
        if args.protocol is not None:
            # user specified the protocol
            protocol = get_protocol(args.protocol)
        else:
            protocol = get_protocol(bravo_metadata.sop_protocol)

        if not set(barcode_files.quant_amp).issuperset(protocol.mapping):
            missing = map(str,
                          set(protocol.mapping) - set(barcode_files.quant_amp))
            message = f"Missing quant amp files for {barcode}: {', '.join(missing)}"
            logger.critical(msg=message)
            continue

        if args.plate_map_file is not None:
            plate_map_type = accession.get_plate_map_type_from_name(
                args.plate_map_file.name)
            accession_data = accession.read_accession_data(
                plate_map_type, args.plate_map_file)
        elif args.use_gdrive:
            accession_data = accession.get_accession_data_with_rerun(
                drive_service,
                plate_layout_folder_id,
                sample_metadata_form,
                rerun_form,
                bravo_metadata.sample_barcode,
            )
        else:
            raise ValueError(
                "You must provide a plate map file or use Google Drive")

        control_wells = get_control_wells_from_type(
            controls_type=bravo_metadata.controls_type,
            accession_data=accession_data,
        )
        # check for valid accessions
        update_accession_data_with_controls(control_wells, accession_data,
                                            barcode)

        # process well data and check controls, return results
        logger.info(msg=f"Processing well data and controls for: {barcode}")

        processing_results = process_barcode(
            cfg,
            barcode,
            barcode_files,
            bravo_metadata,
            protocol,
            control_wells,
            accession_data,
        )

        with (run_path / processing_results.results_filename).open("w") as fh:
            processing_results.write_results(fh)

        with (run_path /
              processing_results.cb_report_filename).open("w") as fh:
            processing_results.write_cb_report(fh)

        # create pdf report
        logger.info(msg=f"Generating results PDF for: {barcode}")
        final_pdf_filename = run_path / processing_results.final_pdf_filename
        with open(final_pdf_filename, "wb") as output_file:
            create_final_pdf(processing_results, output_file)