def retrieve_file(filename: str) -> ChecksummedFileInfo: http_client = getattr(tls, "http", None) if http_client is None: http_client = new_http_client_from_service(self.drive_service) setattr(tls, "http", http_client) drive_obj = drive.find_file_by_name( self.drive_service, self.folder_id, filename, drive.FindMode.MOST_RECENTLY_MODIFIED, http=http_client, ) with drive.get_file(self.drive_service, drive_obj.id, http=http_client) as fh: data = fh.read() if isinstance(data, str): data_fh = StringIO(data) elif isinstance(data, bytes): data_fh = BytesIO(data) data_fh.name = filename # needed for readers that expect a name attr return ChecksummedFileInfo(filename, data_fh, drive_obj.md5Checksum)
def fetch_barcodes(args, cfg): google_credentials = gutils.get_secrets_manager_credentials(args.secret_id) drive_service = drive.get_service(google_credentials) # qpcr logs folder logs_folder_id = drive.get_folder_id_of_path(drive_service, cfg.PCR_LOGS_FOLDER) logs_folder_contents = drive.get_contents_by_folder_id(drive_service, logs_folder_id, only_files=True) barcodes_to_fetch = defaultdict(RunFiles) for entry in logs_folder_contents: m = RunFiles.get_qpcr_file_type(entry.name) if m is None: continue elif m[RunFiles.BARCODE] in args.barcodes: barcodes_to_fetch[m[RunFiles.BARCODE]].add_file(m, entry) for barcode, barcode_files in barcodes_to_fetch.items(): # all files must be present, at least one quant_amp file if not barcode_files.all_files: logger.warning(msg=f"Missing files for {barcode}!") continue logger.info(msg=f"Found sample to fetch: {barcode}") # read in the run information and quant cq run_info = barcode_files.run_info logger.info(msg=f" Downloading: {run_info.name}") with drive.get_file(drive_service, run_info.id, binary=False) as fh: with (args.output_dir / run_info.name).open("w") as out: out.write(fh.read()) quant_cq = barcode_files.quant_cq logger.info(msg=f" Downloading: {quant_cq.name}") with drive.get_file(drive_service, quant_cq.id, binary=False) as fh: with (args.output_dir / quant_cq.name).open("w") as out: out.write(fh.read()) for quant_amp in barcode_files.quant_amp.values(): logger.info(msg=f" Downloading: {quant_amp.name}") with drive.get_file(drive_service, quant_amp.id, binary=False) as fh: with (args.output_dir / quant_amp.name).open("w") as out: out.write(fh.read())
def compile_accession_info_from_file(self, accession_file, local_processing): """ Generate accession tracking information for all accessions in the given sample barcode and append information. """ name = accession_file.name binary_mode = False if name.endswith(".xlsx"): binary_mode = True with drive.get_file(self.drive_service, accession_file.id, binary=binary_mode) as fh: plate_map_type = get_plate_map_type_from_name(name) try: well_to_accession = read_accession_data(plate_map_type, fh) except Exception as e: logger.error( f"Could not extract accessions info from filename {name}, skipping, exception: {e}" ) return timestamp = extract_timestamp_from_plate_map_filename( name, plate_map_type) sample_barcode = extract_barcode_from_plate_map_filename( name, plate_map_type) if not sample_barcode: logger.error( f"Could not extract sample barcode from filename {name}, skipping" ) return tracker = SampleTracker( timestamp=timestamp, sample_barcode=sample_barcode, drive_service=self.drive_service, processing_resources=self.processing_resources, ) self.supervisor_plate_queue_data.append( tracker.format_row_entry_for_supervisor_plate_queue()) if plate_map_type != PlateMapType.LEGACY: for well, accession in well_to_accession.items(): if accession != "CONTROL" and accession != "EMPTY": for entry in tracker.format_verbose_row_entries( well, accession): self.verbose_data.append(entry) if re.match(VALID_ACCESSION, accession.rstrip()): # only add valid accessions to the clin lab sheet for entry in tracker.format_row_entries_clin_lab( well, accession): self.clin_lab_data.append(entry) if not local_processing and tracker.finished_processing: self.write_marker_file(sample_barcode=sample_barcode)
def test_put_overwrite_multiple( gdrive_service: DriveService, gdrive_folder: DriveObject, filename="test_put_overwrite_multiple.txt", ): """Test the case where we are overwriting and there are multiple files we could possibly overwrite. It should overwrite the newest file.""" put_request = put_file(gdrive_service, gdrive_folder.id, filename) with put_request as fh: fh.write("first") first_id = put_request.id put_request = put_file(gdrive_service, gdrive_folder.id, filename, overwrite_if_present=False) with put_request as fh: fh.write("second") second_id = put_request.id put_request = put_file(gdrive_service, gdrive_folder.id, filename, overwrite_if_present=True) with put_request as fh: fh.write("third") assert put_request.id == second_id listing = get_contents_by_folder_id(gdrive_service, gdrive_folder.id, only_files=True) matching_listings = [entry for entry in listing if entry.name == filename] assert len(matching_listings) == 2 with get_file(gdrive_service, first_id, True) as fh: assert fh.read() == b"first" with get_file(gdrive_service, second_id, False) as fh: assert fh.read() == "third"
def get_accession_locations(drive_service, cfg) -> Dict[str, Tuple[str, str]]: """return a mapping between accession ID's and their origin location""" accession_locations = {} accession_location_folder_id = drive.get_folder_id_of_path( drive_service, cfg.ACCESSION_LOCATIONS_FOLDER) accession_location_files = drive.get_contents_by_folder_id( drive_service, accession_location_folder_id, only_files=True) for accession_location_file in accession_location_files: with drive.get_file(drive_service, accession_location_file.id) as fh: accession_location_reader = csv.reader(fh, delimiter=",") for row in accession_location_reader: if row[0] == "Accession": # header row continue submitter_id = "" if len(row) == 3: accession, location, submitter_id = row else: accession, location = row accession_locations[accession] = location, submitter_id return accession_locations