Exemplos de create_fcs em Python, exemplos de flowio.create_fcs em Python

Exemplo n.º 1

0

Exibir arquivo

    def export_fcs(
            self,
            source='xform',
            subsample=False,
            filename=None,
            directory=None
    ):
        """
        Export event data to a new FCS file.

        :param source: 'raw', 'comp', 'xform' for whether the raw, compensated
            or transformed events are used for exporting
        :param subsample: Whether to export all events or just the
            sub-sampled events. Default is False (all events).
        :param filename: Text string to use for the exported file name. If
            None, the FCS file's original file name will be used (if present).
        :param directory: Directory path where the FCS file will be saved
        :return: None
        """
        if self.original_filename is None and filename is None:
            raise(
                ValueError(
                    "Sample has no original filename, please provide a 'filename' argument"
                )
            )
        elif filename is None:
            filename = self.original_filename

        if directory is not None:
            output_path = os.path.join(directory, filename)
        else:
            output_path = filename

        if subsample:
            idx = self.subsample_indices
        else:
            idx = np.arange(self.event_count)

        if source == 'xform':
            events = self._transformed_events[idx, :]
        elif source == 'comp':
            events = self._comp_events[idx, :]
        elif source == 'raw':
            events = self._raw_events[idx, :]
        else:
            raise ValueError("source must be one of 'raw', 'comp', or 'xform'")

        fh = open(output_path, 'wb')

        flowio.create_fcs(
            events.flatten().tolist(),
            channel_names=self.pnn_labels,
            opt_channel_names=self.pns_labels,
            file_handle=fh
        )

        fh.close()

Exemplo n.º 2

0

Exibir arquivo

Arquivo: fcs_write_tests.py Projeto: matt-faria/FlowIO

    def test_create_fcs(self):
        event_data = self.flow_data.events
        channel_names = self.flow_data.channels
        pnn_labels = [v['PnN'] for k, v in channel_names.items()]

        export_file_path = "examples/fcs_files/test_fcs_export.fcs"
        fh = open(export_file_path, 'wb')
        create_fcs(event_data, channel_names=pnn_labels, file_handle=fh)
        fh.close()

        exported_flow_data = FlowData(export_file_path)
        os.unlink(export_file_path)

        self.assertIsInstance(exported_flow_data, FlowData)

Exemplo n.º 3

0

Exibir arquivo

def main(args):
    """ Main function to perform fcs file combination """
    fcs_info = pd.read_csv(args.fcs_info_file, sep=',')
    fcs_info_grouped = fcs_info.groupby(by=[args.individual_name])
    markers = args.marker.strip('\n').split(',')
    opt_markers = args.opt_marker.strip('\n').split(',')

    os.makedirs(args.out_dir, exist_ok=True)
    print("Start writing the fcs files")

    combiner = MpCombiner(args)
    pool = multiprocessing.Pool(args.nprocs)
    tasks = pool.imap(combiner.combine, fcs_info_grouped)

    count = 0
    with tqdm(range(len(fcs_info_grouped))) as t:
        for ind_name, events in tasks:
            out_fcs_name = f"{str(ind_name).zfill(4)}.FCS"
            with open(os.path.join(args.out_dir, out_fcs_name), 'wb') as fh:
                flowio.create_fcs(events.flatten().tolist(),
                                  channel_names=markers,
                                  opt_channel_names=opt_markers,
                                  file_handle=fh)
            count += 1
            t.update()
    pool.close()
    pool.join()
    # make sure the processing number is correct
    assert len(fcs_info_grouped) == count

    # write meta file
    meta_file_path = os.path.join(args.out_dir, 'sample_with_labels.csv')
    FCS_file, label, ind = [], [], []
    for ind_name, df in fcs_info_grouped:
        FCS_file.append(f"{str(ind_name).zfill(4)}.FCS")
        ind.append(f"{str(ind_name)}"),
        label.append(df[args.label_name].values[0])

    meta_df = pd.DataFrame({
        'FCS_file': FCS_file,
        'Individual': ind,
        'Condition': label
    })
    meta_df.to_csv(meta_file_path, index=False)

    print("Combining fcs files finished")

Exemplo n.º 4

0

Exibir arquivo

def writefcs(path, channels):
    """Create and write an FCS file with channels specified by the
    channels dictionary.

    The dictionary should contain PNN labels matched with event data,
    the event data is assumed to be synced across all channels,
    i.e. event 1 will be the first element of each channels event
    data, event 2 the second elements, and so on.

    The channel data should be arraylikes of floats and be of the same
    length.
    """

    pnn_labels = list(channels.keys())
    events = zip(*[channels[label] for label in pnn_labels])
    flattened_events = [data for event in events for data in event]
    with open(path, 'wb') as outfile:
        create_fcs(flattened_events, pnn_labels, outfile)

Exemplo n.º 5

0

Exibir arquivo

Arquivo: pre_process.py Projeto: CompCy-lab/cytoset

def write_fcs(fcs_file,
              marker_map,
              out_file,
              downsampling=None,
              clust_id=None,
              co_factor=5,
              jac_std=0.15,
              seed=12345):
    f = flowio.FlowData(fcs_file)

    for i in range(1, f.channel_count + 1):
        key = str(i)
        if 'PnS' in f.channels[key] and f.channels[key]['PnS'] != u' ':
            f.channels[key]['PnS'] = marker_map[key]
        elif 'PnN' in f.channels[key] and f.channels[key]['PnN'] != u' ':
            f.channels[key]['PnN'] = marker_map[key]
        else:
            raise NotImplementedError(
                'Both PnS and PnN is not the key in .fcs file')

    if downsampling:
        if downsampling > 0 and downsampling < 1:
            if clust_id is not None:
                clust_id = [int(x) for x in clust_id.split(',')]
            else:
                clust_id = [i for i in range(f.channel_count)]

            downsampled_events = down_sampling(f, downsampling, clust_id,
                                               co_factor, jac_std, seed)
        else:
            downsampled_events = random_sampling(f, down_sampling)

        fh = open(out_file, 'wb')
        flowio.create_fcs(
            downsampled_events.flatten().tolist(),
            channel_names=[chn['PnN'] for _, chn in f.channels.items()],
            opt_channel_names=[chn['PnS'] for _, chn in f.channels.items()],
            file_handle=fh)
        fh.close()
    else:
        f.write_fcs(out_file)

Exemplo n.º 6

0

Exibir arquivo

Arquivo: filter.py Projeto: whitews/flonomaly

def create_filtered_fcs(fcs_file, results_dir, bad_events):
    if not os.path.exists(results_dir):
        os.makedirs(results_dir)

    flow_data = flowio.FlowData(fcs_file)
    events = np.reshape(flow_data.events, (-1, flow_data.channel_count))

    good_events = events[np.logical_not(bad_events)]
    bad_events = events[bad_events]

    base_name = os.path.basename(fcs_file)
    good_file_path = os.path.join(results_dir, base_name.replace('.fcs', '_good.fcs'))
    bad_file_path = os.path.join(results_dir, base_name.replace('.fcs', '_bad.fcs'))

    # build channel names
    channel_names = []
    opt_channel_names = []
    for channel in sorted([int(k) for k in flow_data.channels.keys()]):
        channel_names.append(flow_data.channels[str(channel)]['PnN'])

        if 'PnS' in flow_data.channels[str(channel)]:
            opt_channel_names.append(flow_data.channels[str(channel)]['PnS'])
        else:
            opt_channel_names.append(None)

    # build some extra metadata fields
    extra = {}
    acq_date = None
    if 'date' in flow_data.text:
        acq_date = flow_data.text['date']

    if 'timestep' in flow_data.text:
        extra['TIMESTEP'] = flow_data.text['timestep']

    if 'btim' in flow_data.text:
        extra['BTIM'] = flow_data.text['btim']

    if 'etim' in flow_data.text:
        extra['ETIM'] = flow_data.text['etim']

    good_fh = open(good_file_path, 'wb')
    bad_fh = open(bad_file_path, 'wb')

    flowio.create_fcs(
        good_events.flatten().tolist(),
        channel_names,
        good_fh,
        date=acq_date,
        extra=extra,
        opt_channel_names=opt_channel_names
    )
    good_fh.close()

    flowio.create_fcs(
        bad_events.flatten().tolist(),
        channel_names,
        bad_fh,
        date=acq_date,
        extra=extra,
        opt_channel_names=opt_channel_names
    )
    bad_fh.close()

Exemplo n.º 7

0

Exibir arquivo

Arquivo: sample.py Projeto: hiraaneesawan/FlowKit

    def export(self,
               filename,
               source='xform',
               exclude=None,
               subsample=False,
               directory=None):
        """
        Export Sample event data to either a new FCS file or a CSV file. Format determined by filename extension.

        :param filename: Text string to use for the exported file name.
        :param source: 'raw', 'comp', 'xform' for whether the raw, compensated
            or transformed events are used for exporting
        :param exclude: Specifies whether to exclude events. Options are 'good', 'bad', or None.
            'bad' excludes neg. scatter or anomalous, 'good' will export the bad events.
            Default is None (exports all events)
        :param subsample: Whether to export all events or just the
            sub-sampled events. Default is False (all events).
        :param directory: Directory path where the CSV will be saved
        :return: None
        """
        if directory is not None:
            output_path = os.path.join(directory, filename)
        else:
            output_path = filename

        if subsample:
            idx = np.zeros(self.event_count, np.bool)
            idx[self.subsample_indices] = True
        else:
            # include all events to start with
            idx = np.ones(self.event_count, np.bool)

        if exclude == 'bad':
            idx[self.anomalous_indices] = False
        elif exclude == 'good':
            good_idx = np.zeros(self.event_count, np.bool)
            good_idx[self.anomalous_indices] = True
            idx = np.logical_and(idx, good_idx)

        if source == 'xform':
            events = self._transformed_events[idx, :]
        elif source == 'comp':
            events = self._comp_events[idx, :]
        elif source == 'raw':
            events = self._raw_events[idx, :]
        elif source == 'orig':
            events = self._orig_events[idx, :]
        else:
            raise ValueError("source must be one of 'raw', 'comp', or 'xform'")

        ext = os.path.splitext(filename)[-1]

        if ext == '.csv':
            np.savetxt(output_path,
                       events,
                       delimiter=',',
                       header=",".join(self.pnn_labels),
                       comments='')
        elif ext == '.fcs':
            fh = open(output_path, 'wb')

            flowio.create_fcs(events.flatten().tolist(),
                              channel_names=self.pnn_labels,
                              opt_channel_names=self.pns_labels,
                              file_handle=fh)
            fh.close()

Exemplo n.º 8

0

Exibir arquivo

Arquivo: sample.py Projeto: hiraaneesawan/FlowKit

    def __init__(self,
                 fcs_path_or_data,
                 channel_labels=None,
                 compensation=None,
                 null_channel_list=None):
        """
        Create a Sample instance
        """
        # inspect our fcs_path_or_data argument
        if isinstance(fcs_path_or_data, str):
            # if a string, we only handle file paths, so try creating a FlowData object
            flow_data = flowio.FlowData(fcs_path_or_data)
        elif isinstance(fcs_path_or_data, io.IOBase):
            flow_data = flowio.FlowData(fcs_path_or_data)
        elif isinstance(fcs_path_or_data, Path):
            flow_data = flowio.FlowData(fcs_path_or_data.open('rb'))
        elif isinstance(fcs_path_or_data, flowio.FlowData):
            flow_data = fcs_path_or_data
        elif isinstance(fcs_path_or_data, np.ndarray):
            tmp_file = TemporaryFile()
            flowio.create_fcs(fcs_path_or_data.flatten().tolist(),
                              channel_names=channel_labels,
                              file_handle=tmp_file)

            flow_data = flowio.FlowData(tmp_file)
        elif isinstance(fcs_path_or_data, pd.DataFrame):
            tmp_file = TemporaryFile()
            flowio.create_fcs(fcs_path_or_data.values.flatten().tolist(),
                              channel_names=fcs_path_or_data.columns,
                              file_handle=tmp_file)

            flow_data = flowio.FlowData(tmp_file)
        else:
            raise ValueError("'fcs_path_or_data' is not a supported type")

        try:
            self.version = flow_data.header['version']
        except KeyError:
            self.version = None

        self.null_channels = null_channel_list
        self.event_count = flow_data.event_count
        self.channels = flow_data.channels
        self.pnn_labels = list()
        self.pns_labels = list()
        self.fluoro_indices = list()

        channel_gain = []
        channel_lin_log = []
        channel_range = []
        self.metadata = flow_data.text
        time_index = None

        for n in sorted([int(k) for k in self.channels.keys()]):
            chan_label = self.channels[str(n)]['PnN']
            self.pnn_labels.append(chan_label)

            if 'p%dg' % n in self.metadata:
                channel_gain.append(float(self.metadata['p%dg' % n]))
            else:
                channel_gain.append(1.0)

            if 'p%dr' % n in self.metadata:
                channel_range.append(float(self.metadata['p%dr' % n]))
            else:
                channel_range.append(None)

            if 'p%de' % n in self.metadata:
                (decades, log0) = [
                    float(x) for x in self.metadata['p%de' % n].split(',')
                ]
                if log0 == 0 and decades != 0:
                    log0 = 1.0  # FCS std states to use 1.0 for invalid 0 value
                channel_lin_log.append((decades, log0))
            else:
                channel_lin_log.append((0.0, 0.0))

            if chan_label.lower()[:4] not in ['fsc-', 'ssc-', 'time']:
                self.fluoro_indices.append(n - 1)
            elif chan_label.lower() == 'time':
                time_index = n - 1

            if 'PnS' in self.channels[str(n)]:
                self.pns_labels.append(self.channels[str(n)]['PnS'])
            else:
                self.pns_labels.append('')

        self._flowjo_pnn_labels = [
            label.replace('/', '_') for label in self.pnn_labels
        ]

        # Raw events need to be scaled according to channel gain, as well
        # as corrected for proper lin/log display
        # This is the only pre-processing we will do on raw events
        raw_events = np.reshape(np.array(flow_data.events, dtype=np.float),
                                (-1, flow_data.channel_count))

        if 'timestep' in self.metadata and time_index is not None:
            time_step = float(self.metadata['timestep'])
            raw_events[:, time_index] = raw_events[:, time_index] * time_step

        # But first, we'll save the unprocessed events
        self._orig_events = raw_events.copy()

        for i, (decades, log0) in enumerate(channel_lin_log):
            if decades > 0:
                raw_events[:, i] = (10**(decades * raw_events[:, i] /
                                         channel_range[i])) * log0

        self._raw_events = raw_events / channel_gain
        self._comp_events = None
        self._transformed_events = None
        self.compensation = None
        self.transform = None
        self._subsample_count = None
        self._subsample_seed = None

        if compensation is not None:
            self.apply_compensation(compensation)

        # if filtering any events, save those in case they want to be retrieved
        self.negative_scatter_indices = None
        self.anomalous_indices = None
        self.subsample_indices = None

        try:
            self.acquisition_date = self.metadata['date']
        except KeyError:
            self.acquisition_date = None

        # TODO: Allow user to set some sort of Sample ID or the orig filename,
        #       would be useful for Samples created from data arrays or if
        #       2 FCS files had the same file name.
        try:
            self.original_filename = self.metadata['fil']
        except KeyError:
            if isinstance(fcs_path_or_data, str):
                self.original_filename = os.path.basename(fcs_path_or_data)
            else:
                self.original_filename = None

Exemplo n.º 9

0

Exibir arquivo

    def __init__(
            self,
            fcs_path_or_data,
            channel_labels=None,
            compensation=None,
            subsample_count=10000,
            random_seed=1,
            filter_negative_scatter=False,
            filter_anomalous_events=False,
            null_channel_list=None
    ):
        """
        Create a Sample instance

        :param fcs_path_or_data: FCS data, can be either:
                - a file path or file handle to an FCS file
                - a pathlib Path object
                - a FlowIO FlowData object
                - a NumPy array of FCS event data (must provide channel_labels)
                - a Pandas DataFrame containing FCS event data (channel labels as headers)
        :param channel_labels: A list of strings or a list of tuples to use for the channel
            labels. Required if fcs_path_or_data is a NumPy array
        :param compensation: Compensation matrix. Can be either:
                - a text string in CSV or TSV format
                - a string path to a CSV or TSV file
                - a pathlib Path object to a CSV or TSV file
        :param subsample_count: Number of events to use as a sub-sample. If None, then no
            sub-sampling is performed. If the number of events in the Sample is less than the
            requested sub-sample count, then the maximum number of available events is used
            for the sub-sample.
        :param random_seed: Random seed used for sub-sampling events
        :param filter_negative_scatter: If True, negative scatter events are omitted from
            the sub-sample. Only used for sub-sampling.
        :param filter_anomalous_events: If True, anomalous events are omitted from the
            sub-sample. Anomalous events are determined via Kolmogorov-Smirnov statistical
            test performed on each channel. The reference distribution is chosen based on
            the difference from the median.
        :param null_channel_list: List of PnN labels for channels that were collected
            but do not contain useful data. Note, this should only be used if there were
            truly no fluorochromes used targeting those detectors and the channels
            do not contribute to compensation.
        """
        # inspect our fcs_path_or_data argument
        if isinstance(fcs_path_or_data, str):
            # if a string, we only handle file paths, so try creating a FlowData object
            flow_data = flowio.FlowData(fcs_path_or_data)
        elif isinstance(fcs_path_or_data, io.IOBase):
            flow_data = flowio.FlowData(fcs_path_or_data)
        elif isinstance(fcs_path_or_data, Path):
            flow_data = flowio.FlowData(fcs_path_or_data.open('rb'))
        elif isinstance(fcs_path_or_data, flowio.FlowData):
            flow_data = fcs_path_or_data
        elif isinstance(fcs_path_or_data, np.ndarray):
            tmp_file = TemporaryFile()
            flowio.create_fcs(
                fcs_path_or_data.flatten().tolist(),
                channel_names=channel_labels,
                file_handle=tmp_file
            )

            flow_data = flowio.FlowData(tmp_file)
        else:
            raise ValueError("'fcs_path_or_data' is not a supported type")

        try:
            self.version = flow_data.header['version']
        except KeyError:
            self.version = None

        self.null_channels = null_channel_list
        self.event_count = flow_data.event_count
        self.channels = flow_data.channels
        self.pnn_labels = list()
        self.pns_labels = list()
        self.fluoro_indices = list()

        channel_gain = []
        channel_lin_log = []
        channel_range = []
        self.metadata = flow_data.text

        for n in sorted([int(k) for k in self.channels.keys()]):
            chan_label = self.channels[str(n)]['PnN']
            self.pnn_labels.append(chan_label)

            if 'p%dg' % n in self.metadata:
                channel_gain.append(float(self.metadata['p%dg' % n]))
            else:
                channel_gain.append(1.0)

            if 'p%dr' % n in self.metadata:
                channel_range.append(float(self.metadata['p%dr' % n]))
            else:
                channel_range.append(None)

            if 'p%de' % n in self.metadata:
                (decades, log0) = [
                    float(x) for x in self.metadata['p%de' % n].split(',')
                ]
                if log0 == 0 and decades != 0:
                    log0 = 1.0  # FCS std states to use 1.0 for invalid 0 value
                channel_lin_log.append((decades, log0))
            else:
                channel_lin_log.append((0.0, 0.0))

            if chan_label.lower()[:4] not in ['fsc-', 'ssc-', 'time']:
                self.fluoro_indices.append(n - 1)

            if 'PnS' in self.channels[str(n)]:
                self.pns_labels.append(self.channels[str(n)]['PnS'])
            else:
                self.pns_labels.append('')

        # Raw events need to be scaled according to channel gain, as well
        # as corrected for proper lin/log display
        # These are the only pre-processing we will do on raw events
        raw_events = np.reshape(
            np.array(flow_data.events, dtype=np.float),
            (-1, flow_data.channel_count)
        )

        # But first, we'll save the unprocessed events
        self._orig_events = raw_events.copy()

        for i, (decades, log0) in enumerate(channel_lin_log):
            if decades > 0:
                raw_events[:, i] = (10 ** (decades * raw_events[:, i] / channel_range[i])) * log0

        self.transform = None
        self._raw_events = raw_events / channel_gain
        self._comp_events = None
        self._transformed_events = None
        self.compensation = None

        self.apply_compensation(compensation)

        # if filtering anomalous events, save those in case they want to be retrieved
        self.anomalous_indices = None

        # Save sub-sampled indices if requested
        if subsample_count is not None:
            self.subsample_indices = self._generate_subsample(
                subsample_count,
                random_seed,
                filter_negative_scatter=filter_negative_scatter,
                filter_anomalous_events=filter_anomalous_events  # will store anomalous events
            )
        else:
            self.subsample_indices = None

        try:
            self.acquisition_date = self.metadata['date']
        except KeyError:
            self.acquisition_date = None

        try:
            self.original_filename = self.metadata['fil']
        except KeyError:
            if isinstance(fcs_path_or_data, str):
                self.original_filename = os.path.basename(fcs_path_or_data)
            else:
                self.original_filename = None

Exemplo n.º 10

0

Exibir arquivo

Arquivo: sample.py Projeto: whitews/FlowKit

    def export(self,
               filename,
               source='xform',
               exclude_neg_scatter=False,
               exclude_flagged=False,
               exclude_normal=False,
               subsample=False,
               directory=None):
        """
        Export Sample event data to either a new FCS file or a CSV file. Format determined by filename extension.

        :param filename: Text string to use for the exported file name. File type is determined by
            the filename extension (supported types are .fcs & .csv).
        :param source: 'orig', 'raw', 'comp', 'xform' for whether the original (no gain applied),
            raw (orig + gain), compensated (raw + comp), or transformed (comp + xform) events  are
            used for exporting
        :param exclude_neg_scatter: Whether to exclude negative scatter events. Default is False.
        :param exclude_flagged: Whether to exclude flagged events. Default is False.
        :param exclude_normal: Whether to exclude "normal" events. This is useful for retrieving all
             the "bad" events (neg scatter and/or flagged events). Default is False.
        :param subsample: Whether to export all events or just the sub-sampled events.
            Default is False (all events).
        :param directory: Directory path where the exported file will be saved. If None, the file
            will be saved in the current working directory.
        :return: None
        """
        # get the requested file type (either .fcs or .csv)
        ext = os.path.splitext(filename)[-1].lower()

        # Next, check if exporting as CSV, and issue a warning if so.
        # Exporting original events to CSV doesn't allow for the
        # inclusion of the proper metadata (PnG, PnE, PnR) for the
        # exported event values to be interpreted correctly.
        if ext == '.csv' and source == 'orig':
            warnings.warn(
                "Exporting original events as CSV will not include the metadata (gain, timestep, etc.) "
                "to properly interpret the exported event values.")

        if directory is not None:
            output_path = os.path.join(directory, filename)
        else:
            output_path = filename

        if subsample:
            idx = np.zeros(self.event_count, bool)
            idx[self.subsample_indices] = True
        else:
            # include all events to start with
            idx = np.ones(self.event_count, bool)

        if exclude_flagged:
            idx[self.flagged_indices] = False
        if exclude_neg_scatter:
            idx[self.negative_scatter_indices] = False
        if exclude_normal:
            # start with all events marked normal
            normal_idx = np.zeros(self.event_count, bool)

            # set neg scatter and flagged events to False
            normal_idx[self.negative_scatter_indices] = False
            normal_idx[self.flagged_indices] = False

            # then filter out the inverse normal indices
            idx = np.logical_and(idx, ~normal_idx)

        extra_dict = {}

        events = self.get_events(source=source)
        events = events[idx, :]

        if source == 'orig':
            if 'timestep' in self.metadata and self.time_index is not None:
                extra_dict['TIMESTEP'] = self.metadata['timestep']

            # check for channel scale for each channel, log scale is not supported yet by FlowIO
            for (decades, log0) in self.channel_lin_log:
                if decades > 0:
                    raise NotImplementedError(
                        "Export of FCS files with original events containing PnE instructions "
                        "for log scale is not yet supported")

            # check for channel gain values != 1.0
            for _, channel_row in self.channels.iterrows():
                gain_keyword = 'p%dg' % channel_row['channel_number']
                gain_value = channel_row['png']
                if gain_value != 1.0:
                    extra_dict[gain_keyword] = gain_value

        # TODO: support exporting to HDF5 format, but as optional dependency/import
        if ext == '.csv':
            np.savetxt(output_path,
                       events,
                       delimiter=',',
                       header=",".join(self.pnn_labels),
                       comments='')
        elif ext == '.fcs':
            fh = open(output_path, 'wb')

            flowio.create_fcs(events.flatten().tolist(),
                              channel_names=self.pnn_labels,
                              opt_channel_names=self.pns_labels,
                              file_handle=fh,
                              extra=extra_dict)
            fh.close()

Exemplo n.º 11

0

Exibir arquivo

Arquivo: sample.py Projeto: whitews/FlowKit

    def __init__(self,
                 fcs_path_or_data,
                 channel_labels=None,
                 compensation=None,
                 null_channel_list=None,
                 ignore_offset_error=False,
                 cache_original_events=False,
                 subsample=10000):
        """
        Create a Sample instance
        """
        # inspect our fcs_path_or_data argument
        if isinstance(fcs_path_or_data, str):
            # if a string, we only handle file paths, so try creating a FlowData object
            flow_data = flowio.FlowData(
                fcs_path_or_data, ignore_offset_error=ignore_offset_error)
        elif isinstance(fcs_path_or_data, io.IOBase):
            flow_data = flowio.FlowData(
                fcs_path_or_data, ignore_offset_error=ignore_offset_error)
        elif isinstance(fcs_path_or_data, Path):
            flow_data = flowio.FlowData(
                fcs_path_or_data.open('rb'),
                ignore_offset_error=ignore_offset_error)
        elif isinstance(fcs_path_or_data, flowio.FlowData):
            flow_data = fcs_path_or_data
        elif isinstance(fcs_path_or_data, np.ndarray):
            tmp_file = TemporaryFile()
            flowio.create_fcs(fcs_path_or_data.flatten().tolist(),
                              channel_names=channel_labels,
                              file_handle=tmp_file)

            flow_data = flowio.FlowData(tmp_file)
        elif isinstance(fcs_path_or_data, pd.DataFrame):
            tmp_file = TemporaryFile()

            # Handle MultiIndex columns since that is what the as_dataframe method creates.
            if fcs_path_or_data.columns.nlevels > 1:
                pnn_labels = fcs_path_or_data.columns.get_level_values(0)
                pns_labels = fcs_path_or_data.columns.get_level_values(1)
            else:
                pnn_labels = fcs_path_or_data.columns
                pns_labels = None

            flowio.create_fcs(fcs_path_or_data.values.flatten().tolist(),
                              channel_names=pnn_labels,
                              file_handle=tmp_file,
                              opt_channel_names=pns_labels)

            flow_data = flowio.FlowData(tmp_file)
        else:
            raise ValueError("'fcs_path_or_data' is not a supported type")

        try:
            self.version = flow_data.header['version']
        except KeyError:
            self.version = None

        self.null_channels = null_channel_list
        self.event_count = flow_data.event_count

        # make a temp channels dict, self.channels will be a DataFrame built from it
        tmp_channels = flow_data.channels
        self.pnn_labels = list()
        self.pns_labels = list()
        self.fluoro_indices = list()
        self.scatter_indices = list()
        self.time_index = None

        channel_gain = []
        self.channel_lin_log = []
        channel_range = []
        self.metadata = flow_data.text

        for n in sorted([int(k) for k in tmp_channels.keys()]):
            channel_label = tmp_channels[str(n)]['PnN']
            self.pnn_labels.append(channel_label)

            if 'p%dg' % n in self.metadata:
                channel_gain.append(float(self.metadata['p%dg' % n]))
            else:
                channel_gain.append(1.0)

            # PnR range values are required for all channels
            channel_range.append(float(self.metadata['p%dr' % n]))

            # PnE specifies whether the parameter data is stored in on linear or log scale
            # and includes 2 values: (f1, f2)
            # where:
            #     f1 is the number of log decades (valid values are f1 >= 0)
            #     f2 is the value to use for log(0) (valid values are f2 >= 0)
            # Note for log scale, both values must be > 0
            # linear = (0, 0)
            # log    = (f1 > 0, f2 > 0)
            if 'p%de' % n in self.metadata:
                (decades, log0) = [
                    float(x) for x in self.metadata['p%de' % n].split(',')
                ]
                if log0 == 0 and decades != 0:
                    log0 = 1.0  # FCS std states to use 1.0 for invalid 0 value
                self.channel_lin_log.append((decades, log0))
            else:
                self.channel_lin_log.append((0.0, 0.0))

            if channel_label.lower()[:4] not in ['fsc-', 'ssc-', 'time']:
                self.fluoro_indices.append(n - 1)
            elif channel_label.lower()[:4] in ['fsc-', 'ssc-']:
                self.scatter_indices.append(n - 1)
            elif channel_label.lower() == 'time':
                self.time_index = n - 1

            if 'PnS' in tmp_channels[str(n)]:
                self.pns_labels.append(tmp_channels[str(n)]['PnS'])
            else:
                self.pns_labels.append('')

        self._flowjo_pnn_labels = [
            label.replace('/', '_') for label in self.pnn_labels
        ]

        # build the self.channels DataFrame
        self.channels = pd.DataFrame()
        self.channels['channel_number'] = sorted(
            [int(k) for k in tmp_channels.keys()])
        self.channels['pnn'] = self.pnn_labels
        self.channels['pns'] = self.pns_labels
        self.channels['png'] = channel_gain
        self.channels['pnr'] = channel_range

        # Start processing the event data. First, we'll get the unprocessed events
        # This is the main entry point for event data into FlowKit, and we ensure
        # the events are double precision because of the pre-processing that will
        # make even integer FCS data types floating point. The precision is needed
        # for accurate gating results.
        tmp_orig_events = np.reshape(
            np.array(flow_data.events, dtype=np.float64),
            (-1, flow_data.channel_count))

        if cache_original_events:
            self._orig_events = tmp_orig_events
        else:
            self._orig_events = None

        # Event data must be scaled according to channel gain, as well
        # as corrected for proper lin/log display, and the time channel
        # scaled by the 'timestep' keyword value (if present).
        # This is the only pre-processing we will do on raw events
        raw_events = copy.deepcopy(tmp_orig_events)

        # Note: The time channel is scaled by the timestep (if present),
        # but should not be scaled by any gain value present in PnG.
        # It seems common for cytometers to include a gain value for the
        # time channel that matches the fluoro channels. Not sure why
        # they do this but it makes no sense to have an amplifier gain
        # on the time data. Here, we set any time gain to 1.0.
        if self.time_index is not None:
            channel_gain[self.time_index] = 1.0

        if 'timestep' in self.metadata and self.time_index is not None:
            time_step = float(self.metadata['timestep'])
            raw_events[:, self.
                       time_index] = raw_events[:, self.time_index] * time_step

        for i, (decades, log0) in enumerate(self.channel_lin_log):
            if decades > 0:
                raw_events[:, i] = (10**(decades * raw_events[:, i] /
                                         channel_range[i])) * log0

        self._raw_events = raw_events / channel_gain
        self._comp_events = None
        self._transformed_events = None
        self.compensation = None
        self.transform = None
        self._subsample_count = None
        self._subsample_seed = None
        self._include_scatter_option = False  # stores user option from transform_events method

        if compensation is not None:
            self.apply_compensation(compensation)

        # if filtering any events, save those in case they want to be retrieved
        self.negative_scatter_indices = None
        self.flagged_indices = None
        self.subsample_indices = None

        try:
            self.acquisition_date = self.metadata['date']
        except KeyError:
            self.acquisition_date = None

        # TODO: Allow user to set some sort of Sample ID or the orig filename,
        #       would be useful for Samples created from data arrays or if
        #       2 FCS files had the same file name.
        try:
            self.original_filename = self.metadata['fil']
        except KeyError:
            if isinstance(fcs_path_or_data, str):
                self.original_filename = os.path.basename(fcs_path_or_data)
            else:
                self.original_filename = None

        # finally, store initial sub-sampled event indices
        self.subsample_events(subsample)