def test_count_imaging_table_columns(self): """ Test if imaging logic is properly wrapped """ columns = py_interop_table.imaging_column_vector() self.assertEqual(py_interop_table.count_table_columns(columns), 0)
def test_populate_imaging_table(self): """ Test if imaging logic can be properly used """ tmp = numpy.asarray([2,38 ,7,0,90,4,1,0,-12,-56,15,64,-98,35,12,64,0,0,0,0,0,0,0,0,46,1,17,1,0,0,0,0,96,-41,-104,36,122,-86,-46,-120 ,7,0,-66,4,1,0,96,-43,14,64,-63,49,13,64,0,0,0,0,0,0,0,0,56,1,17,1,0,0,0,0,112,125,77,38,122,-86,-46,-120 ,7,0,66,8,1,0,74,-68,6,64,-118,-7,8,64,0,0,0,0,0,0,0,0,93,1,46,1,0,0,0,0,-47,-104,2,40,122,-86,-46,-120], dtype=numpy.uint8) run = py_interop_run_metrics.run_metrics() py_interop_comm.read_interop_from_buffer(tmp, run.extraction_metric_set()) self.assertEqual(run.extraction_metric_set().size(), 3) reads = py_interop_run.read_info_vector() reads.append(py_interop_run.read_info(1, 1, 26)) reads.append(py_interop_run.read_info(2, 27, 76)) run.run_info(py_interop_run.info( py_interop_run.flowcell_layout(2, 2, 2, 16), reads )) run.legacy_channel_update(py_interop_run.HiSeq) columns = py_interop_table.imaging_column_vector() py_interop_table.create_imaging_table_columns(run, columns) row_offsets = py_interop_table.map_id_offset() py_interop_table.count_table_rows(run, row_offsets) column_count = py_interop_table.count_table_columns(columns) data = numpy.zeros((len(row_offsets), column_count), dtype=numpy.float32) py_interop_table.populate_imaging_table_data(run, columns, row_offsets, data.ravel()) self.assertEqual(data[0, 0], 7)
def get_percent_occupied_by_lane(run_folder_path): df = pd.DataFrame for item in NOVASEQ: if 'myrun' not in run_folder_path.lower() and item.lower( ) in run_folder_path.lower(): valid_to_load = py_interop_run.uchar_vector( py_interop_run.MetricCount, 0) valid_to_load[py_interop_run.ExtendedTile] = 1 valid_to_load[py_interop_run.Tile] = 1 valid_to_load[py_interop_run.Extraction] = 1 run_metrics = py_interop_run_metrics.run_metrics() run_metrics.read(run_folder_path, valid_to_load) columns = py_interop_table.imaging_column_vector() py_interop_table.create_imaging_table_columns(run_metrics, columns) headers = get_headers(columns, run_folder_path) column_count = py_interop_table.count_table_columns(columns) row_offsets = py_interop_table.map_id_offset() py_interop_table.count_table_rows(run_metrics, row_offsets) data = np.zeros((row_offsets.size(), column_count), dtype=np.float32) py_interop_table.populate_imaging_table_data( run_metrics, columns, row_offsets, data.ravel()) header_subset = ["Lane", "Tile", "Cycle", "% Occupied"] header_index = [(header, headers.index(header)) for header in header_subset] ids = np.asarray( [headers.index(header) for header in header_subset[:3]]) data_for_selected_header_subset = [] for label, col in header_index: data_for_selected_header_subset.append( (label, pd.Series([val for val in data[:, col]], index=[tuple(r) for r in data[:, ids]]))) df = pd.DataFrame.from_dict(dict(data_for_selected_header_subset)) return df
def plot_occupancy(run_folder: str, output_jpg_prefix="occupancy"): """ To optimize loading concentrations on the NovaSeq platform, the % Occupied and % Pass Filter metrics can be plotted to determine if a run was underloaded, optimally loaded, or overloaded. More information: https://support.illumina.com/bulletins/2020/03/plotting---occupied-by---pass-filter-to-optimize-loading-concent.html """ # Initialize interop objects run_metrics = py_interop_run_metrics.run_metrics() valid_to_load = py_interop_run.uchar_vector(py_interop_run.MetricCount, 0) valid_to_load[py_interop_run.ExtendedTile] = 1 valid_to_load[py_interop_run.Tile] = 1 valid_to_load[py_interop_run.Extraction] = 1 # Read from the run folder run_metrics.read(run_folder, valid_to_load) # Create the columns columns = py_interop_table.imaging_column_vector() py_interop_table.create_imaging_table_columns(run_metrics, columns) headers = [] for i in range(columns.size()): column = columns[i] if column.has_children(): headers.extend( [f"{column.name()} ({subname})" for subname in column.subcolumns()]) else: headers.append(column.name()) column_count = py_interop_table.count_table_columns(columns) row_offsets = py_interop_table.map_id_offset() py_interop_table.count_table_rows(run_metrics, row_offsets) data = np.zeros((row_offsets.size(), column_count), dtype=np.float32) py_interop_table.populate_imaging_table_data( run_metrics, columns, row_offsets, data.ravel() ) # Make a DataFrame df = pd.DataFrame(data, columns=headers) # Skip if there is no data (% Occupied only available on NovaSeq) if df.shape[0] == 0 or "% Occupied" not in df: # Stop print("Occupancy plot skipped, no data available") return x = "% Occupied" y = "% Pass Filter" hues = ["Tile", "Lane", "Cycle"] # Make a few different types of plots for hue in hues: sns.scatterplot( data=df, x=x, y=y, hue=hue, alpha=0.5, linewidth=0, ) plt.xlim([0, 100]) plt.ylim([0, 100]) plt.legend(title=hue, bbox_to_anchor=[1.2, 0.9]) plt.tight_layout() plt.savefig(f"{output_jpg_prefix}_{hue.lower()}.jpg", dpi=600) plt.close()