def test_fetch_flowcell_pdc_retrieval_failed( mock_store, mock_pdc, mock_flowcell, mock_check_processing, mock_maximum_flowcells_ondisk, caplog, ): """tests the fetch_flowcell method of the backup API""" caplog.set_level(logging.INFO) # GIVEN we are going to retrieve a flowcell from PDC backup_api = BackupApi( mock_store, mock_pdc, max_flowcells_on_disk=1250, root_dir="/path/to/root_dir" ) backup_api.check_processing.return_value = True backup_api.maximum_flowcells_ondisk.return_value = False # WHEN the retrieval process fails mock_pdc.retrieve_flowcell.side_effect = subprocess.CalledProcessError(1, "echo") with pytest.raises(subprocess.CalledProcessError): backup_api.fetch_flowcell(mock_flowcell, dry_run=False) # THEN the failure to retrieve is logged assert "retrieval failed" in caplog.text
def test_fetch_flowcell_no_flowcells_requested( mock_store, mock_pdc, mock_check_processing, mock_maximum_flowcells_ondisk, mock_pop_flowcell, caplog, ): """tests the fetch_flowcell method of the backup API""" caplog.set_level(logging.INFO) # GIVEN we check if a flowcell needs to be retrieved from PDC backup_api = BackupApi( mock_store, mock_pdc, max_flowcells_on_disk=1250, root_dir="/path/to/root_dir" ) # WHEN no flowcells are requested mock_pop_flowcell.return_value = None backup_api.check_processing.return_value = True backup_api.maximum_flowcells_ondisk.return_value = False # AND no flowcell has been specified mock_flowcell = None result = backup_api.fetch_flowcell(mock_flowcell, dry_run=False) # THEN no flowcell will be fetched and a log message indicates that no flowcells have been # requested assert result is None assert "no flowcells requested" in caplog.text
def test_fetch_flowcell_retrieve_specified_flowcell( mock_store, mock_pdc, mock_flowcell, mock_check_processing, mock_maximum_flowcells_ondisk, caplog, ): """tests the fetch_flowcell method of the backup API""" caplog.set_level(logging.INFO) # GIVEN we want to retrieve a specific flowcell from PDC backup_api = BackupApi( mock_store, mock_pdc, max_flowcells_on_disk=1250, root_dir="/path/to/root_dir" ) backup_api.check_processing.return_value = True backup_api.maximum_flowcells_ondisk.return_value = False result = backup_api.fetch_flowcell(mock_flowcell, dry_run=False) # THEN the process to retrieve the flowcell from PDC is started assert "retrieving from PDC" in caplog.text # AND when done the status of that flowcell is set to "retrieved" assert f'Status for flowcell {mock_flowcell.name} set to "retrieved"' in caplog.text assert mock_flowcell.status == "retrieved" # AND status-db is updated with the new status assert mock_store.commit.called # AND the elapsed time of the retrieval process is returned assert result > 0
def test_fetch_flowcell_max_flowcells_ondisk( mock_store, mock_pdc, mock_flowcell, mock_check_processing, mock_maximum_flowcells_ondisk, caplog, ): """tests the fetch_flowcell method of the backup API""" caplog.set_level(logging.INFO) # GIVEN we check if a flowcell needs to be retrieved from PDC backup_api = BackupApi( mock_store, mock_pdc, max_flowcells_on_disk=1250, root_dir="/path/to/root_dir" ) # WHEN the processing queue is not full but the number of flowcells on disk is greater than the # maximum backup_api.check_processing.return_value = True backup_api.maximum_flowcells_ondisk.return_value = True result = backup_api.fetch_flowcell(mock_flowcell, dry_run=False) # THEN no flowcell will be fetched and a log message indicates that maximum number of flowcells # has been reached assert result is None assert "maximum flowcells ondisk reached" in caplog.text
def fetch_flowcell(context: click.Context, dry_run: bool, flowcell: str): """Fetch the first flowcell in the requested queue from backup.""" status_api = Store(context.obj["database"]) max_flowcells_on_disk = context.obj.get("max_flowcells", MAX_FLOWCELLS_ON_DISK) pdc_api = PdcApi() backup_api = BackupApi(status=status_api, pdc_api=pdc_api, max_flowcells_on_disk=max_flowcells_on_disk) if flowcell: flowcell_obj = status_api.flowcell(flowcell) if flowcell_obj is None: LOG.error(f"{flowcell}: not found in database") context.abort() else: flowcell_obj = None retrieval_time = backup_api.fetch_flowcell(flowcell_obj=flowcell_obj, dry_run=dry_run) if retrieval_time: hours = retrieval_time / 60 / 60 LOG.info(f"Retrieval time: {hours:.1}h") return if not flowcell: return if not dry_run: LOG.info(f"{flowcell}: updating flowcell status to requested") flowcell_obj.status = "requested" status_api.commit()
def test_maximum_processing_queue_not_full(mock_store, mock_pdc): """tests check_processing method of the backup api""" # GIVEN a flowcell needs to be retrieved from PDC backup_api = BackupApi( mock_store, mock_pdc, max_flowcells_on_disk=1250, root_dir="/path/to/root_dir" ) # WHEN there are no flowcells being retrieved from PDC mock_store.flowcells(status="processing").count.return_value = 0 # THEN this method should return True assert backup_api.check_processing(max_processing_flowcells=1) is True
def test_maximum_flowcells_ondisk_not_reached(mock_store, mock_pdc): """tests maximum_flowcells_ondisk method of the backup api""" # GIVEN a flowcell needs to be retrieved from PDC backup_api = BackupApi( mock_store, mock_pdc, max_flowcells_on_disk=1250, root_dir="/path/to/root_dir" ) # WHEN the number of flowcells with status "ondisk" less than the maximum number allowed mock_store.flowcells(status="ondisk").count.return_value = 1000 # THEN this method should return False assert backup_api.maximum_flowcells_ondisk() is False
def test_pop_flowcell_no_flowcell_requested(mock_store, mock_pdc): """tests pop_flowcell method of the backup api""" # GIVEN status-db needs to be checked for flowcells to be retrieved from PDC backup_api = BackupApi( mock_store, mock_pdc, max_flowcells_on_disk=1250, root_dir="/path/to/root_dir" ) # WHEN there are no flowcells requested to be retrieved from PDC mock_store.flowcells(status="requested").first.return_value = None popped_flowcell = backup_api.pop_flowcell(dry_run=False) # THEN no flowcell is returned assert popped_flowcell is None
def test_pop_flowcell_dry_run(mock_store, mock_pdc, mock_flowcell): """tests pop_flowcell method of the backup api""" # GIVEN status-db needs to be checked for flowcells to be retrieved from PDC backup_api = BackupApi( mock_store, mock_pdc, max_flowcells_on_disk=1250, root_dir="/path/to/root_dir" ) # WHEN a flowcell is requested to be retrieved from PDC # AND it's a dry run popped_flowcell = backup_api.pop_flowcell(dry_run=True) # THEN a flowcell is returned, the status is set to "processing", but status-db is NOT updated with # the new status assert popped_flowcell is not None assert not mock_store.commit.called
def test_pop_flowcell_next_requested(mock_store, mock_pdc, mock_flowcell): """tests pop_flowcell method of the backup api""" # GIVEN status-db needs to be checked for flowcells to be retrieved from PDC backup_api = BackupApi( mock_store, mock_pdc, max_flowcells_on_disk=1250, root_dir="/path/to/root_dir" ) # WHEN a flowcell is requested to be retrieved from PDC mock_store.flowcells(status="requested").first.return_value = mock_flowcell popped_flowcell = backup_api.pop_flowcell(dry_run=False) # THEN a flowcell is returned, the status is set to "processing", and status-db is updated with # the new status assert popped_flowcell is not None
def backup(context: CGConfig): """Backup utilities.""" pdc_api = PdcApi() context.meta_apis["backup_api"] = BackupApi( status=context.status_db, pdc_api=pdc_api, max_flowcells_on_disk=context.max_flowcells or MAX_FLOWCELLS_ON_DISK, root_dir=context.backup.root.dict(), )
def fixture_backup_context(cg_context: CGConfig) -> CGConfig: cg_context.meta_apis["backup_api"] = BackupApi( status=cg_context.status_db, pdc_api=PdcApi(), max_flowcells_on_disk=cg_context.max_flowcells or MAX_FLOWCELLS_ON_DISK, root_dir=cg_context.backup.root.dict(), ) return cg_context
def test_fetch_flowcell_retrieve_next_flowcell( mock_store, mock_pdc, mock_check_processing, mock_maximum_flowcells_ondisk, mock_pop_flowcell, caplog, ): """tests the fetch_flowcell method of the backup API""" caplog.set_level(logging.INFO) # GIVEN we check if a flowcell needs to be retrieved from PDC backup_api = BackupApi( mock_store, mock_pdc, max_flowcells_on_disk=1250, root_dir="/path/to/root_dir" ) # WHEN no flowcells is specified, but a flowcell in status-db has the status "requested" mock_flowcell = None mock_pop_flowcell.return_value = mock_store.add_flowcell( status="requested", ) backup_api.check_processing.return_value = True backup_api.maximum_flowcells_ondisk.return_value = False result = backup_api.fetch_flowcell(mock_flowcell, dry_run=False) # THEN the process to retrieve the flowcell from PDC is started assert "retrieving from PDC" in caplog.text # AND when done the status of that flowcell is set to "retrieved" assert ( f'Status for flowcell {mock_pop_flowcell.return_value.name} set to "retrieved"' in caplog.text ) assert mock_pop_flowcell.return_value.status == "retrieved" # AND status-db is updated with the new status assert mock_store.commit.called # AND the elapsed time of the retrieval process is returned assert result > 0
def test_fetch_flowcell_processing_queue_full( mock_store, mock_pdc, mock_flowcell, mock_check_processing, caplog ): """tests the fetch_flowcell method of the backup API""" caplog.set_level(logging.INFO) # GIVEN we check if a flowcell needs to be retrieved from PDC backup_api = BackupApi( mock_store, mock_pdc, max_flowcells_on_disk=1250, root_dir="/path/to/root_dir" ) # WHEN the processing queue is full backup_api.check_processing.return_value = False result = backup_api.fetch_flowcell(mock_flowcell, dry_run=False) # THEN no flowcell will be fetched and a log message indicates that the processing queue is # full assert result is None assert "processing queue is full" in caplog.text