def export_fcs( self, source='xform', subsample=False, filename=None, directory=None ): """ Export event data to a new FCS file. :param source: 'raw', 'comp', 'xform' for whether the raw, compensated or transformed events are used for exporting :param subsample: Whether to export all events or just the sub-sampled events. Default is False (all events). :param filename: Text string to use for the exported file name. If None, the FCS file's original file name will be used (if present). :param directory: Directory path where the FCS file will be saved :return: None """ if self.original_filename is None and filename is None: raise( ValueError( "Sample has no original filename, please provide a 'filename' argument" ) ) elif filename is None: filename = self.original_filename if directory is not None: output_path = os.path.join(directory, filename) else: output_path = filename if subsample: idx = self.subsample_indices else: idx = np.arange(self.event_count) if source == 'xform': events = self._transformed_events[idx, :] elif source == 'comp': events = self._comp_events[idx, :] elif source == 'raw': events = self._raw_events[idx, :] else: raise ValueError("source must be one of 'raw', 'comp', or 'xform'") fh = open(output_path, 'wb') flowio.create_fcs( events.flatten().tolist(), channel_names=self.pnn_labels, opt_channel_names=self.pns_labels, file_handle=fh ) fh.close()
def test_create_fcs(self): event_data = self.flow_data.events channel_names = self.flow_data.channels pnn_labels = [v['PnN'] for k, v in channel_names.items()] export_file_path = "examples/fcs_files/test_fcs_export.fcs" fh = open(export_file_path, 'wb') create_fcs(event_data, channel_names=pnn_labels, file_handle=fh) fh.close() exported_flow_data = FlowData(export_file_path) os.unlink(export_file_path) self.assertIsInstance(exported_flow_data, FlowData)
def main(args): """ Main function to perform fcs file combination """ fcs_info = pd.read_csv(args.fcs_info_file, sep=',') fcs_info_grouped = fcs_info.groupby(by=[args.individual_name]) markers = args.marker.strip('\n').split(',') opt_markers = args.opt_marker.strip('\n').split(',') os.makedirs(args.out_dir, exist_ok=True) print("Start writing the fcs files") combiner = MpCombiner(args) pool = multiprocessing.Pool(args.nprocs) tasks = pool.imap(combiner.combine, fcs_info_grouped) count = 0 with tqdm(range(len(fcs_info_grouped))) as t: for ind_name, events in tasks: out_fcs_name = f"{str(ind_name).zfill(4)}.FCS" with open(os.path.join(args.out_dir, out_fcs_name), 'wb') as fh: flowio.create_fcs(events.flatten().tolist(), channel_names=markers, opt_channel_names=opt_markers, file_handle=fh) count += 1 t.update() pool.close() pool.join() # make sure the processing number is correct assert len(fcs_info_grouped) == count # write meta file meta_file_path = os.path.join(args.out_dir, 'sample_with_labels.csv') FCS_file, label, ind = [], [], [] for ind_name, df in fcs_info_grouped: FCS_file.append(f"{str(ind_name).zfill(4)}.FCS") ind.append(f"{str(ind_name)}"), label.append(df[args.label_name].values[0]) meta_df = pd.DataFrame({ 'FCS_file': FCS_file, 'Individual': ind, 'Condition': label }) meta_df.to_csv(meta_file_path, index=False) print("Combining fcs files finished")
def writefcs(path, channels): """Create and write an FCS file with channels specified by the channels dictionary. The dictionary should contain PNN labels matched with event data, the event data is assumed to be synced across all channels, i.e. event 1 will be the first element of each channels event data, event 2 the second elements, and so on. The channel data should be arraylikes of floats and be of the same length. """ pnn_labels = list(channels.keys()) events = zip(*[channels[label] for label in pnn_labels]) flattened_events = [data for event in events for data in event] with open(path, 'wb') as outfile: create_fcs(flattened_events, pnn_labels, outfile)
def write_fcs(fcs_file, marker_map, out_file, downsampling=None, clust_id=None, co_factor=5, jac_std=0.15, seed=12345): f = flowio.FlowData(fcs_file) for i in range(1, f.channel_count + 1): key = str(i) if 'PnS' in f.channels[key] and f.channels[key]['PnS'] != u' ': f.channels[key]['PnS'] = marker_map[key] elif 'PnN' in f.channels[key] and f.channels[key]['PnN'] != u' ': f.channels[key]['PnN'] = marker_map[key] else: raise NotImplementedError( 'Both PnS and PnN is not the key in .fcs file') if downsampling: if downsampling > 0 and downsampling < 1: if clust_id is not None: clust_id = [int(x) for x in clust_id.split(',')] else: clust_id = [i for i in range(f.channel_count)] downsampled_events = down_sampling(f, downsampling, clust_id, co_factor, jac_std, seed) else: downsampled_events = random_sampling(f, down_sampling) fh = open(out_file, 'wb') flowio.create_fcs( downsampled_events.flatten().tolist(), channel_names=[chn['PnN'] for _, chn in f.channels.items()], opt_channel_names=[chn['PnS'] for _, chn in f.channels.items()], file_handle=fh) fh.close() else: f.write_fcs(out_file)
def create_filtered_fcs(fcs_file, results_dir, bad_events): if not os.path.exists(results_dir): os.makedirs(results_dir) flow_data = flowio.FlowData(fcs_file) events = np.reshape(flow_data.events, (-1, flow_data.channel_count)) good_events = events[np.logical_not(bad_events)] bad_events = events[bad_events] base_name = os.path.basename(fcs_file) good_file_path = os.path.join(results_dir, base_name.replace('.fcs', '_good.fcs')) bad_file_path = os.path.join(results_dir, base_name.replace('.fcs', '_bad.fcs')) # build channel names channel_names = [] opt_channel_names = [] for channel in sorted([int(k) for k in flow_data.channels.keys()]): channel_names.append(flow_data.channels[str(channel)]['PnN']) if 'PnS' in flow_data.channels[str(channel)]: opt_channel_names.append(flow_data.channels[str(channel)]['PnS']) else: opt_channel_names.append(None) # build some extra metadata fields extra = {} acq_date = None if 'date' in flow_data.text: acq_date = flow_data.text['date'] if 'timestep' in flow_data.text: extra['TIMESTEP'] = flow_data.text['timestep'] if 'btim' in flow_data.text: extra['BTIM'] = flow_data.text['btim'] if 'etim' in flow_data.text: extra['ETIM'] = flow_data.text['etim'] good_fh = open(good_file_path, 'wb') bad_fh = open(bad_file_path, 'wb') flowio.create_fcs( good_events.flatten().tolist(), channel_names, good_fh, date=acq_date, extra=extra, opt_channel_names=opt_channel_names ) good_fh.close() flowio.create_fcs( bad_events.flatten().tolist(), channel_names, bad_fh, date=acq_date, extra=extra, opt_channel_names=opt_channel_names ) bad_fh.close()
def export(self, filename, source='xform', exclude=None, subsample=False, directory=None): """ Export Sample event data to either a new FCS file or a CSV file. Format determined by filename extension. :param filename: Text string to use for the exported file name. :param source: 'raw', 'comp', 'xform' for whether the raw, compensated or transformed events are used for exporting :param exclude: Specifies whether to exclude events. Options are 'good', 'bad', or None. 'bad' excludes neg. scatter or anomalous, 'good' will export the bad events. Default is None (exports all events) :param subsample: Whether to export all events or just the sub-sampled events. Default is False (all events). :param directory: Directory path where the CSV will be saved :return: None """ if directory is not None: output_path = os.path.join(directory, filename) else: output_path = filename if subsample: idx = np.zeros(self.event_count, np.bool) idx[self.subsample_indices] = True else: # include all events to start with idx = np.ones(self.event_count, np.bool) if exclude == 'bad': idx[self.anomalous_indices] = False elif exclude == 'good': good_idx = np.zeros(self.event_count, np.bool) good_idx[self.anomalous_indices] = True idx = np.logical_and(idx, good_idx) if source == 'xform': events = self._transformed_events[idx, :] elif source == 'comp': events = self._comp_events[idx, :] elif source == 'raw': events = self._raw_events[idx, :] elif source == 'orig': events = self._orig_events[idx, :] else: raise ValueError("source must be one of 'raw', 'comp', or 'xform'") ext = os.path.splitext(filename)[-1] if ext == '.csv': np.savetxt(output_path, events, delimiter=',', header=",".join(self.pnn_labels), comments='') elif ext == '.fcs': fh = open(output_path, 'wb') flowio.create_fcs(events.flatten().tolist(), channel_names=self.pnn_labels, opt_channel_names=self.pns_labels, file_handle=fh) fh.close()
def __init__(self, fcs_path_or_data, channel_labels=None, compensation=None, null_channel_list=None): """ Create a Sample instance """ # inspect our fcs_path_or_data argument if isinstance(fcs_path_or_data, str): # if a string, we only handle file paths, so try creating a FlowData object flow_data = flowio.FlowData(fcs_path_or_data) elif isinstance(fcs_path_or_data, io.IOBase): flow_data = flowio.FlowData(fcs_path_or_data) elif isinstance(fcs_path_or_data, Path): flow_data = flowio.FlowData(fcs_path_or_data.open('rb')) elif isinstance(fcs_path_or_data, flowio.FlowData): flow_data = fcs_path_or_data elif isinstance(fcs_path_or_data, np.ndarray): tmp_file = TemporaryFile() flowio.create_fcs(fcs_path_or_data.flatten().tolist(), channel_names=channel_labels, file_handle=tmp_file) flow_data = flowio.FlowData(tmp_file) elif isinstance(fcs_path_or_data, pd.DataFrame): tmp_file = TemporaryFile() flowio.create_fcs(fcs_path_or_data.values.flatten().tolist(), channel_names=fcs_path_or_data.columns, file_handle=tmp_file) flow_data = flowio.FlowData(tmp_file) else: raise ValueError("'fcs_path_or_data' is not a supported type") try: self.version = flow_data.header['version'] except KeyError: self.version = None self.null_channels = null_channel_list self.event_count = flow_data.event_count self.channels = flow_data.channels self.pnn_labels = list() self.pns_labels = list() self.fluoro_indices = list() channel_gain = [] channel_lin_log = [] channel_range = [] self.metadata = flow_data.text time_index = None for n in sorted([int(k) for k in self.channels.keys()]): chan_label = self.channels[str(n)]['PnN'] self.pnn_labels.append(chan_label) if 'p%dg' % n in self.metadata: channel_gain.append(float(self.metadata['p%dg' % n])) else: channel_gain.append(1.0) if 'p%dr' % n in self.metadata: channel_range.append(float(self.metadata['p%dr' % n])) else: channel_range.append(None) if 'p%de' % n in self.metadata: (decades, log0) = [ float(x) for x in self.metadata['p%de' % n].split(',') ] if log0 == 0 and decades != 0: log0 = 1.0 # FCS std states to use 1.0 for invalid 0 value channel_lin_log.append((decades, log0)) else: channel_lin_log.append((0.0, 0.0)) if chan_label.lower()[:4] not in ['fsc-', 'ssc-', 'time']: self.fluoro_indices.append(n - 1) elif chan_label.lower() == 'time': time_index = n - 1 if 'PnS' in self.channels[str(n)]: self.pns_labels.append(self.channels[str(n)]['PnS']) else: self.pns_labels.append('') self._flowjo_pnn_labels = [ label.replace('/', '_') for label in self.pnn_labels ] # Raw events need to be scaled according to channel gain, as well # as corrected for proper lin/log display # This is the only pre-processing we will do on raw events raw_events = np.reshape(np.array(flow_data.events, dtype=np.float), (-1, flow_data.channel_count)) if 'timestep' in self.metadata and time_index is not None: time_step = float(self.metadata['timestep']) raw_events[:, time_index] = raw_events[:, time_index] * time_step # But first, we'll save the unprocessed events self._orig_events = raw_events.copy() for i, (decades, log0) in enumerate(channel_lin_log): if decades > 0: raw_events[:, i] = (10**(decades * raw_events[:, i] / channel_range[i])) * log0 self._raw_events = raw_events / channel_gain self._comp_events = None self._transformed_events = None self.compensation = None self.transform = None self._subsample_count = None self._subsample_seed = None if compensation is not None: self.apply_compensation(compensation) # if filtering any events, save those in case they want to be retrieved self.negative_scatter_indices = None self.anomalous_indices = None self.subsample_indices = None try: self.acquisition_date = self.metadata['date'] except KeyError: self.acquisition_date = None # TODO: Allow user to set some sort of Sample ID or the orig filename, # would be useful for Samples created from data arrays or if # 2 FCS files had the same file name. try: self.original_filename = self.metadata['fil'] except KeyError: if isinstance(fcs_path_or_data, str): self.original_filename = os.path.basename(fcs_path_or_data) else: self.original_filename = None
def __init__( self, fcs_path_or_data, channel_labels=None, compensation=None, subsample_count=10000, random_seed=1, filter_negative_scatter=False, filter_anomalous_events=False, null_channel_list=None ): """ Create a Sample instance :param fcs_path_or_data: FCS data, can be either: - a file path or file handle to an FCS file - a pathlib Path object - a FlowIO FlowData object - a NumPy array of FCS event data (must provide channel_labels) - a Pandas DataFrame containing FCS event data (channel labels as headers) :param channel_labels: A list of strings or a list of tuples to use for the channel labels. Required if fcs_path_or_data is a NumPy array :param compensation: Compensation matrix. Can be either: - a text string in CSV or TSV format - a string path to a CSV or TSV file - a pathlib Path object to a CSV or TSV file :param subsample_count: Number of events to use as a sub-sample. If None, then no sub-sampling is performed. If the number of events in the Sample is less than the requested sub-sample count, then the maximum number of available events is used for the sub-sample. :param random_seed: Random seed used for sub-sampling events :param filter_negative_scatter: If True, negative scatter events are omitted from the sub-sample. Only used for sub-sampling. :param filter_anomalous_events: If True, anomalous events are omitted from the sub-sample. Anomalous events are determined via Kolmogorov-Smirnov statistical test performed on each channel. The reference distribution is chosen based on the difference from the median. :param null_channel_list: List of PnN labels for channels that were collected but do not contain useful data. Note, this should only be used if there were truly no fluorochromes used targeting those detectors and the channels do not contribute to compensation. """ # inspect our fcs_path_or_data argument if isinstance(fcs_path_or_data, str): # if a string, we only handle file paths, so try creating a FlowData object flow_data = flowio.FlowData(fcs_path_or_data) elif isinstance(fcs_path_or_data, io.IOBase): flow_data = flowio.FlowData(fcs_path_or_data) elif isinstance(fcs_path_or_data, Path): flow_data = flowio.FlowData(fcs_path_or_data.open('rb')) elif isinstance(fcs_path_or_data, flowio.FlowData): flow_data = fcs_path_or_data elif isinstance(fcs_path_or_data, np.ndarray): tmp_file = TemporaryFile() flowio.create_fcs( fcs_path_or_data.flatten().tolist(), channel_names=channel_labels, file_handle=tmp_file ) flow_data = flowio.FlowData(tmp_file) else: raise ValueError("'fcs_path_or_data' is not a supported type") try: self.version = flow_data.header['version'] except KeyError: self.version = None self.null_channels = null_channel_list self.event_count = flow_data.event_count self.channels = flow_data.channels self.pnn_labels = list() self.pns_labels = list() self.fluoro_indices = list() channel_gain = [] channel_lin_log = [] channel_range = [] self.metadata = flow_data.text for n in sorted([int(k) for k in self.channels.keys()]): chan_label = self.channels[str(n)]['PnN'] self.pnn_labels.append(chan_label) if 'p%dg' % n in self.metadata: channel_gain.append(float(self.metadata['p%dg' % n])) else: channel_gain.append(1.0) if 'p%dr' % n in self.metadata: channel_range.append(float(self.metadata['p%dr' % n])) else: channel_range.append(None) if 'p%de' % n in self.metadata: (decades, log0) = [ float(x) for x in self.metadata['p%de' % n].split(',') ] if log0 == 0 and decades != 0: log0 = 1.0 # FCS std states to use 1.0 for invalid 0 value channel_lin_log.append((decades, log0)) else: channel_lin_log.append((0.0, 0.0)) if chan_label.lower()[:4] not in ['fsc-', 'ssc-', 'time']: self.fluoro_indices.append(n - 1) if 'PnS' in self.channels[str(n)]: self.pns_labels.append(self.channels[str(n)]['PnS']) else: self.pns_labels.append('') # Raw events need to be scaled according to channel gain, as well # as corrected for proper lin/log display # These are the only pre-processing we will do on raw events raw_events = np.reshape( np.array(flow_data.events, dtype=np.float), (-1, flow_data.channel_count) ) # But first, we'll save the unprocessed events self._orig_events = raw_events.copy() for i, (decades, log0) in enumerate(channel_lin_log): if decades > 0: raw_events[:, i] = (10 ** (decades * raw_events[:, i] / channel_range[i])) * log0 self.transform = None self._raw_events = raw_events / channel_gain self._comp_events = None self._transformed_events = None self.compensation = None self.apply_compensation(compensation) # if filtering anomalous events, save those in case they want to be retrieved self.anomalous_indices = None # Save sub-sampled indices if requested if subsample_count is not None: self.subsample_indices = self._generate_subsample( subsample_count, random_seed, filter_negative_scatter=filter_negative_scatter, filter_anomalous_events=filter_anomalous_events # will store anomalous events ) else: self.subsample_indices = None try: self.acquisition_date = self.metadata['date'] except KeyError: self.acquisition_date = None try: self.original_filename = self.metadata['fil'] except KeyError: if isinstance(fcs_path_or_data, str): self.original_filename = os.path.basename(fcs_path_or_data) else: self.original_filename = None
def export(self, filename, source='xform', exclude_neg_scatter=False, exclude_flagged=False, exclude_normal=False, subsample=False, directory=None): """ Export Sample event data to either a new FCS file or a CSV file. Format determined by filename extension. :param filename: Text string to use for the exported file name. File type is determined by the filename extension (supported types are .fcs & .csv). :param source: 'orig', 'raw', 'comp', 'xform' for whether the original (no gain applied), raw (orig + gain), compensated (raw + comp), or transformed (comp + xform) events are used for exporting :param exclude_neg_scatter: Whether to exclude negative scatter events. Default is False. :param exclude_flagged: Whether to exclude flagged events. Default is False. :param exclude_normal: Whether to exclude "normal" events. This is useful for retrieving all the "bad" events (neg scatter and/or flagged events). Default is False. :param subsample: Whether to export all events or just the sub-sampled events. Default is False (all events). :param directory: Directory path where the exported file will be saved. If None, the file will be saved in the current working directory. :return: None """ # get the requested file type (either .fcs or .csv) ext = os.path.splitext(filename)[-1].lower() # Next, check if exporting as CSV, and issue a warning if so. # Exporting original events to CSV doesn't allow for the # inclusion of the proper metadata (PnG, PnE, PnR) for the # exported event values to be interpreted correctly. if ext == '.csv' and source == 'orig': warnings.warn( "Exporting original events as CSV will not include the metadata (gain, timestep, etc.) " "to properly interpret the exported event values.") if directory is not None: output_path = os.path.join(directory, filename) else: output_path = filename if subsample: idx = np.zeros(self.event_count, bool) idx[self.subsample_indices] = True else: # include all events to start with idx = np.ones(self.event_count, bool) if exclude_flagged: idx[self.flagged_indices] = False if exclude_neg_scatter: idx[self.negative_scatter_indices] = False if exclude_normal: # start with all events marked normal normal_idx = np.zeros(self.event_count, bool) # set neg scatter and flagged events to False normal_idx[self.negative_scatter_indices] = False normal_idx[self.flagged_indices] = False # then filter out the inverse normal indices idx = np.logical_and(idx, ~normal_idx) extra_dict = {} events = self.get_events(source=source) events = events[idx, :] if source == 'orig': if 'timestep' in self.metadata and self.time_index is not None: extra_dict['TIMESTEP'] = self.metadata['timestep'] # check for channel scale for each channel, log scale is not supported yet by FlowIO for (decades, log0) in self.channel_lin_log: if decades > 0: raise NotImplementedError( "Export of FCS files with original events containing PnE instructions " "for log scale is not yet supported") # check for channel gain values != 1.0 for _, channel_row in self.channels.iterrows(): gain_keyword = 'p%dg' % channel_row['channel_number'] gain_value = channel_row['png'] if gain_value != 1.0: extra_dict[gain_keyword] = gain_value # TODO: support exporting to HDF5 format, but as optional dependency/import if ext == '.csv': np.savetxt(output_path, events, delimiter=',', header=",".join(self.pnn_labels), comments='') elif ext == '.fcs': fh = open(output_path, 'wb') flowio.create_fcs(events.flatten().tolist(), channel_names=self.pnn_labels, opt_channel_names=self.pns_labels, file_handle=fh, extra=extra_dict) fh.close()
def __init__(self, fcs_path_or_data, channel_labels=None, compensation=None, null_channel_list=None, ignore_offset_error=False, cache_original_events=False, subsample=10000): """ Create a Sample instance """ # inspect our fcs_path_or_data argument if isinstance(fcs_path_or_data, str): # if a string, we only handle file paths, so try creating a FlowData object flow_data = flowio.FlowData( fcs_path_or_data, ignore_offset_error=ignore_offset_error) elif isinstance(fcs_path_or_data, io.IOBase): flow_data = flowio.FlowData( fcs_path_or_data, ignore_offset_error=ignore_offset_error) elif isinstance(fcs_path_or_data, Path): flow_data = flowio.FlowData( fcs_path_or_data.open('rb'), ignore_offset_error=ignore_offset_error) elif isinstance(fcs_path_or_data, flowio.FlowData): flow_data = fcs_path_or_data elif isinstance(fcs_path_or_data, np.ndarray): tmp_file = TemporaryFile() flowio.create_fcs(fcs_path_or_data.flatten().tolist(), channel_names=channel_labels, file_handle=tmp_file) flow_data = flowio.FlowData(tmp_file) elif isinstance(fcs_path_or_data, pd.DataFrame): tmp_file = TemporaryFile() # Handle MultiIndex columns since that is what the as_dataframe method creates. if fcs_path_or_data.columns.nlevels > 1: pnn_labels = fcs_path_or_data.columns.get_level_values(0) pns_labels = fcs_path_or_data.columns.get_level_values(1) else: pnn_labels = fcs_path_or_data.columns pns_labels = None flowio.create_fcs(fcs_path_or_data.values.flatten().tolist(), channel_names=pnn_labels, file_handle=tmp_file, opt_channel_names=pns_labels) flow_data = flowio.FlowData(tmp_file) else: raise ValueError("'fcs_path_or_data' is not a supported type") try: self.version = flow_data.header['version'] except KeyError: self.version = None self.null_channels = null_channel_list self.event_count = flow_data.event_count # make a temp channels dict, self.channels will be a DataFrame built from it tmp_channels = flow_data.channels self.pnn_labels = list() self.pns_labels = list() self.fluoro_indices = list() self.scatter_indices = list() self.time_index = None channel_gain = [] self.channel_lin_log = [] channel_range = [] self.metadata = flow_data.text for n in sorted([int(k) for k in tmp_channels.keys()]): channel_label = tmp_channels[str(n)]['PnN'] self.pnn_labels.append(channel_label) if 'p%dg' % n in self.metadata: channel_gain.append(float(self.metadata['p%dg' % n])) else: channel_gain.append(1.0) # PnR range values are required for all channels channel_range.append(float(self.metadata['p%dr' % n])) # PnE specifies whether the parameter data is stored in on linear or log scale # and includes 2 values: (f1, f2) # where: # f1 is the number of log decades (valid values are f1 >= 0) # f2 is the value to use for log(0) (valid values are f2 >= 0) # Note for log scale, both values must be > 0 # linear = (0, 0) # log = (f1 > 0, f2 > 0) if 'p%de' % n in self.metadata: (decades, log0) = [ float(x) for x in self.metadata['p%de' % n].split(',') ] if log0 == 0 and decades != 0: log0 = 1.0 # FCS std states to use 1.0 for invalid 0 value self.channel_lin_log.append((decades, log0)) else: self.channel_lin_log.append((0.0, 0.0)) if channel_label.lower()[:4] not in ['fsc-', 'ssc-', 'time']: self.fluoro_indices.append(n - 1) elif channel_label.lower()[:4] in ['fsc-', 'ssc-']: self.scatter_indices.append(n - 1) elif channel_label.lower() == 'time': self.time_index = n - 1 if 'PnS' in tmp_channels[str(n)]: self.pns_labels.append(tmp_channels[str(n)]['PnS']) else: self.pns_labels.append('') self._flowjo_pnn_labels = [ label.replace('/', '_') for label in self.pnn_labels ] # build the self.channels DataFrame self.channels = pd.DataFrame() self.channels['channel_number'] = sorted( [int(k) for k in tmp_channels.keys()]) self.channels['pnn'] = self.pnn_labels self.channels['pns'] = self.pns_labels self.channels['png'] = channel_gain self.channels['pnr'] = channel_range # Start processing the event data. First, we'll get the unprocessed events # This is the main entry point for event data into FlowKit, and we ensure # the events are double precision because of the pre-processing that will # make even integer FCS data types floating point. The precision is needed # for accurate gating results. tmp_orig_events = np.reshape( np.array(flow_data.events, dtype=np.float64), (-1, flow_data.channel_count)) if cache_original_events: self._orig_events = tmp_orig_events else: self._orig_events = None # Event data must be scaled according to channel gain, as well # as corrected for proper lin/log display, and the time channel # scaled by the 'timestep' keyword value (if present). # This is the only pre-processing we will do on raw events raw_events = copy.deepcopy(tmp_orig_events) # Note: The time channel is scaled by the timestep (if present), # but should not be scaled by any gain value present in PnG. # It seems common for cytometers to include a gain value for the # time channel that matches the fluoro channels. Not sure why # they do this but it makes no sense to have an amplifier gain # on the time data. Here, we set any time gain to 1.0. if self.time_index is not None: channel_gain[self.time_index] = 1.0 if 'timestep' in self.metadata and self.time_index is not None: time_step = float(self.metadata['timestep']) raw_events[:, self. time_index] = raw_events[:, self.time_index] * time_step for i, (decades, log0) in enumerate(self.channel_lin_log): if decades > 0: raw_events[:, i] = (10**(decades * raw_events[:, i] / channel_range[i])) * log0 self._raw_events = raw_events / channel_gain self._comp_events = None self._transformed_events = None self.compensation = None self.transform = None self._subsample_count = None self._subsample_seed = None self._include_scatter_option = False # stores user option from transform_events method if compensation is not None: self.apply_compensation(compensation) # if filtering any events, save those in case they want to be retrieved self.negative_scatter_indices = None self.flagged_indices = None self.subsample_indices = None try: self.acquisition_date = self.metadata['date'] except KeyError: self.acquisition_date = None # TODO: Allow user to set some sort of Sample ID or the orig filename, # would be useful for Samples created from data arrays or if # 2 FCS files had the same file name. try: self.original_filename = self.metadata['fil'] except KeyError: if isinstance(fcs_path_or_data, str): self.original_filename = os.path.basename(fcs_path_or_data) else: self.original_filename = None # finally, store initial sub-sampled event indices self.subsample_events(subsample)