def parse_codebook(codebook_csv: str) -> Codebook: """Parses a codebook csv file provided by SeqFISH developers. Parameters ---------- codebook_csv : str The codebook file is expected to contain a matrix whose rows are barcodes and whose columns are imaging rounds. Column IDs are expected to be sequential, and round identifiers (roman numerals) are replaced by integer IDs. Returns ------- Codebook : Codebook object in SpaceTx format. """ csv: pd.DataFrame = pd.read_csv(codebook_csv, index_col=0) genes = csv.index.values data_raw = csv.values rounds = csv.shape[1] channels = data_raw.max() # convert data_raw -> data, where data is genes x channels x rounds data = np.zeros((len(data_raw), rounds, channels)) for b in range(len(data_raw)): for i in range(len(data_raw[b])): if data_raw[b][i] != 0: data[b][i][data_raw[b][i] - 1] = 1 return Codebook.from_numpy(genes, rounds, channels, data)
def parse_codebook(codebook_csv: str) -> Codebook: csv: pd.DataFrame = pd.read_csv(codebook_csv, index_col=0) genes = csv.index.values data_raw = csv.values rounds = csv.shape[1] channels = data_raw.max() # convert data_raw -> data, where data is genes x channels x rounds data = np.zeros((len(data_raw), rounds, channels)) for b in range(len(data_raw)): for i in range(len(data_raw[b])): if data_raw[b][i] != 0: data[b][i][data_raw[b][i] - 1] = 1 return Codebook.from_numpy(genes, rounds, channels, data)
def convert_codebook(oldbook: Codebook, cycles_conv: Dict[int, int], channels_conv: List[Dict[int, int]]) -> Codebook: raw = oldbook.data targets = np.shape(raw)[0] rounds = len(cycles_conv) channels = len(channels_conv[0]) new_data = np.empty((targets, rounds, channels), dtype=int) for t in range(targets): for pr in range(len(raw[t])): # annoying math because dicts are saved for the other direction pchannel = np.argmax(raw[t][pr]) subChannel = [[ tch for tch, pch in subchannel.items() if pch == pchannel ] for subchannel in channels_conv] subRound = np.argmax([len(per_round) for per_round in subChannel]) tchannel = subChannel[subRound][0] tround = [ tr for tr, pround in cycles_conv.items() if pround == pr ][subRound] # print("channel {}->{} round {}->{}".format(pchannel,tchannel,pr,tround)) new_data[t][tround][tchannel] = 1 return Codebook.from_numpy(oldbook.coords["target"].data, rounds, channels, new_data)
def blank_codebook(real_codebook, num_blanks): """ From a codebook of real codes, creates a codebook of those original codes plus a set of blank codes that follow the hamming distance > 1 rule. Resulting codebook will have num_blanks blank codes in addition to all the original real codes. If num_blanks is greater than the total number of blank codes found then all blanks will be added. """ # Extract dimensions and create empty xarray for barcodes roundsN = len(real_codebook["r"]) channelsN = len(real_codebook["c"]) allCombo = xr.zeros_like( xr.DataArray( np.zeros((channelsN**roundsN, roundsN, channelsN)), dims=["target", "r", "c"] ) ) # Start from set of all possible codes barcode = [0] * roundsN for i in range(np.shape(allCombo)[0]): allCombo[i] = barcodeConv(barcode, channelsN) barcode = incrBarcode(barcode, channelsN) # Remove codes that have hamming distance <= 1 to any code in the real codebook cb_codes = real_codebook.argmax(Axes.CH.value) drop_cb_codes = {} rounds = [True] * roundsN for r in range(roundsN): rounds[r] = False drop_codes = cb_codes.sel(r=rounds) drop_codes.values = np.ascontiguousarray(drop_codes.values) drop_codes = _view_row_as_element(drop_codes.values.reshape(drop_codes.shape[0], -1)) drop_cb_codes[r] = drop_codes rounds[r] = True drop_combos = {} rounds = [True] * roundsN for r in range(roundsN): rounds[r] = False combo_codes = allCombo.argmax(Axes.CH.value) combo_codes = combo_codes.sel(r=rounds) combo_codes.values = np.ascontiguousarray(combo_codes.values) combo_codes = _view_row_as_element(combo_codes.values.reshape(combo_codes.shape[0], -1)) drop_combos[r] = combo_codes rounds[r] = True combo_codes = allCombo.argmax(Axes.CH.value) combo_codes.values = np.ascontiguousarray(combo_codes.values) combo_codes = _view_row_as_element(combo_codes.values.reshape(combo_codes.shape[0], -1)) drop = [] for i in range(len(combo_codes)): for r in range(roundsN): if np.any(drop_combos[r][i] == drop_cb_codes[r]): drop.append(i) break drop = set(drop) allCombo = allCombo[[x for x in range(len(combo_codes)) if x not in drop]] # Find set of codes that all have hamming distance of more than 1 to each other # Creates set of codebooks each with a different dropped round, can determine if two codes are 1 or fewer hamming # distances from each other by seeing if they match exactly when the same round is dropped for each code drop_combos = {} rounds = [True] * roundsN for r in range(roundsN): rounds[r] = False combo_codes = allCombo.argmax(Axes.CH.value) combo_codes = combo_codes.sel(r=rounds) combo_codes.values = np.ascontiguousarray(combo_codes.values) combo_codes = _view_row_as_element(combo_codes.values.reshape(combo_codes.shape[0], -1)) drop_combos[r] = combo_codes rounds[r] = True combo_codes = allCombo.argmax(Axes.CH.value) combo_codes.values = np.ascontiguousarray(combo_codes.values) combo_codes = _view_row_as_element(combo_codes.values.reshape(combo_codes.shape[0], -1)) i = 0 while i < len(combo_codes): drop = set() for r in range(roundsN): drop.update([x for x in np.nonzero(drop_combos[r][i] == drop_combos[r])[0]]) drop.remove(i) inds = [x for x in range(len(combo_codes)) if x not in drop] combo_codes = combo_codes[inds] for r in range(roundsN): drop_combos[r] = drop_combos[r][inds] i += 1 # Create Codebook object with blanks blanks = np.zeros((len(combo_codes), roundsN, channelsN)) for i, code in enumerate(combo_codes): for j, x in enumerate(code[0]): blanks[i][j][x] = 1 blank_codebook = Codebook.from_numpy( code_names=["blank" + str(x) for x in range(len(blanks))], n_round=roundsN, n_channel=channelsN, data=blanks, ) # Combine correct number of blank codes with real codebook and return combined codebook if num_blanks > len(blanks): num_blanks = len(blanks) rand_sample = random.sample(range(len(blanks)), num_blanks) combined = xr.concat([real_codebook, blank_codebook[rand_sample]], "target") return combined