Пример #1
0
        def retrieve_file(filename: str) -> ChecksummedFileInfo:
            http_client = getattr(tls, "http", None)
            if http_client is None:
                http_client = new_http_client_from_service(self.drive_service)
                setattr(tls, "http", http_client)

            drive_obj = drive.find_file_by_name(
                self.drive_service,
                self.folder_id,
                filename,
                drive.FindMode.MOST_RECENTLY_MODIFIED,
                http=http_client,
            )

            with drive.get_file(self.drive_service,
                                drive_obj.id,
                                http=http_client) as fh:
                data = fh.read()
                if isinstance(data, str):
                    data_fh = StringIO(data)
                elif isinstance(data, bytes):
                    data_fh = BytesIO(data)

                data_fh.name = filename  # needed for readers that expect a name attr
                return ChecksummedFileInfo(filename, data_fh,
                                           drive_obj.md5Checksum)
Пример #2
0
def fetch_barcodes(args, cfg):
    google_credentials = gutils.get_secrets_manager_credentials(args.secret_id)
    drive_service = drive.get_service(google_credentials)

    # qpcr logs folder
    logs_folder_id = drive.get_folder_id_of_path(drive_service,
                                                 cfg.PCR_LOGS_FOLDER)
    logs_folder_contents = drive.get_contents_by_folder_id(drive_service,
                                                           logs_folder_id,
                                                           only_files=True)

    barcodes_to_fetch = defaultdict(RunFiles)
    for entry in logs_folder_contents:
        m = RunFiles.get_qpcr_file_type(entry.name)
        if m is None:
            continue
        elif m[RunFiles.BARCODE] in args.barcodes:
            barcodes_to_fetch[m[RunFiles.BARCODE]].add_file(m, entry)

    for barcode, barcode_files in barcodes_to_fetch.items():
        # all files must be present, at least one quant_amp file
        if not barcode_files.all_files:
            logger.warning(msg=f"Missing files for {barcode}!")
            continue

        logger.info(msg=f"Found sample to fetch: {barcode}")

        # read in the run information and quant cq
        run_info = barcode_files.run_info
        logger.info(msg=f"    Downloading: {run_info.name}")
        with drive.get_file(drive_service, run_info.id, binary=False) as fh:
            with (args.output_dir / run_info.name).open("w") as out:
                out.write(fh.read())

        quant_cq = barcode_files.quant_cq
        logger.info(msg=f"    Downloading: {quant_cq.name}")
        with drive.get_file(drive_service, quant_cq.id, binary=False) as fh:
            with (args.output_dir / quant_cq.name).open("w") as out:
                out.write(fh.read())

        for quant_amp in barcode_files.quant_amp.values():
            logger.info(msg=f"    Downloading: {quant_amp.name}")
            with drive.get_file(drive_service, quant_amp.id,
                                binary=False) as fh:
                with (args.output_dir / quant_amp.name).open("w") as out:
                    out.write(fh.read())
Пример #3
0
    def compile_accession_info_from_file(self, accession_file,
                                         local_processing):
        """
        Generate accession tracking information for all accessions in the given sample barcode and
        append information.
        """
        name = accession_file.name
        binary_mode = False
        if name.endswith(".xlsx"):
            binary_mode = True
        with drive.get_file(self.drive_service,
                            accession_file.id,
                            binary=binary_mode) as fh:
            plate_map_type = get_plate_map_type_from_name(name)

            try:
                well_to_accession = read_accession_data(plate_map_type, fh)
            except Exception as e:
                logger.error(
                    f"Could not extract accessions info from filename {name}, skipping, exception: {e}"
                )
                return

            timestamp = extract_timestamp_from_plate_map_filename(
                name, plate_map_type)
            sample_barcode = extract_barcode_from_plate_map_filename(
                name, plate_map_type)
            if not sample_barcode:
                logger.error(
                    f"Could not extract sample barcode from filename {name}, skipping"
                )
                return
            tracker = SampleTracker(
                timestamp=timestamp,
                sample_barcode=sample_barcode,
                drive_service=self.drive_service,
                processing_resources=self.processing_resources,
            )
            self.supervisor_plate_queue_data.append(
                tracker.format_row_entry_for_supervisor_plate_queue())
            if plate_map_type != PlateMapType.LEGACY:
                for well, accession in well_to_accession.items():
                    if accession != "CONTROL" and accession != "EMPTY":
                        for entry in tracker.format_verbose_row_entries(
                                well, accession):
                            self.verbose_data.append(entry)
                        if re.match(VALID_ACCESSION, accession.rstrip()):
                            # only add valid accessions to the clin lab sheet
                            for entry in tracker.format_row_entries_clin_lab(
                                    well, accession):
                                self.clin_lab_data.append(entry)
        if not local_processing and tracker.finished_processing:
            self.write_marker_file(sample_barcode=sample_barcode)
Пример #4
0
def test_put_overwrite_multiple(
    gdrive_service: DriveService,
    gdrive_folder: DriveObject,
    filename="test_put_overwrite_multiple.txt",
):
    """Test the case where we are overwriting and there are multiple files we
    could possibly overwrite.  It should overwrite the newest file."""
    put_request = put_file(gdrive_service, gdrive_folder.id, filename)
    with put_request as fh:
        fh.write("first")
    first_id = put_request.id

    put_request = put_file(gdrive_service,
                           gdrive_folder.id,
                           filename,
                           overwrite_if_present=False)
    with put_request as fh:
        fh.write("second")
    second_id = put_request.id

    put_request = put_file(gdrive_service,
                           gdrive_folder.id,
                           filename,
                           overwrite_if_present=True)
    with put_request as fh:
        fh.write("third")
    assert put_request.id == second_id

    listing = get_contents_by_folder_id(gdrive_service,
                                        gdrive_folder.id,
                                        only_files=True)
    matching_listings = [entry for entry in listing if entry.name == filename]
    assert len(matching_listings) == 2

    with get_file(gdrive_service, first_id, True) as fh:
        assert fh.read() == b"first"
    with get_file(gdrive_service, second_id, False) as fh:
        assert fh.read() == "third"
def get_accession_locations(drive_service, cfg) -> Dict[str, Tuple[str, str]]:
    """return a mapping between accession ID's and their origin location"""
    accession_locations = {}
    accession_location_folder_id = drive.get_folder_id_of_path(
        drive_service, cfg.ACCESSION_LOCATIONS_FOLDER)
    accession_location_files = drive.get_contents_by_folder_id(
        drive_service, accession_location_folder_id, only_files=True)
    for accession_location_file in accession_location_files:
        with drive.get_file(drive_service, accession_location_file.id) as fh:
            accession_location_reader = csv.reader(fh, delimiter=",")
            for row in accession_location_reader:
                if row[0] == "Accession":
                    # header row
                    continue
                submitter_id = ""
                if len(row) == 3:
                    accession, location, submitter_id = row
                else:
                    accession, location = row
                accession_locations[accession] = location, submitter_id
    return accession_locations