示例#1
0
    def __init__(self, descriptor):
        """
        :param descriptor: The file descriptor to use
        """
        self.descriptor = descriptor
        if not  isinstance(self.descriptor, StringIO) and \
        not "cStringIO" in str(self.descriptor.__class__) \
        and not isinstance(self.descriptor, io.TextIOWrapper) \
        and not isinstance(self.descriptor, io.BufferedReader):
            raise ValueError(
                "Provide a file descriptor as input! Make sure you are using a Python3 compatible descriptor such as io.open."
            )
        self.rel_bs_time = 0
        self.abs_bs_time = None
        self.cur_abs_time = None
        self.vent_bn = 0
        self.rel_bn = 0
        try:
            self.descriptor = clear_descriptor_null_bytes(self.descriptor)
        except UnicodeDecodeError:
            raise BadDescriptorError(
                'You seem to have opened a file with garbled bytes. you should open it using io.open(file, encoding="ascii", errors="ignore"'
            )

        self.descriptor.seek(0)
        first_line = self.descriptor.readline()
        self.bs_col, self.ncol, self.ts_1st_col, self.ts_1st_row = detect_version_v2(
            first_line)
        self.descriptor.seek(0)
示例#2
0
def gather_flow_and_pressure(f_desc):
    """
    Gather time, flow, and pressure data given some file descriptor of a b2c
    data file.

    Returns data as a list of tuples

    [
        (t1, flow1, pressure1),
        (t2, flow2, pressure2),
        ...
    ]
    """
    reader = csv.reader(f_desc)
    f_desc.seek(0)
    bs_col, ncol, tsfc, tssc = detect_version_v2(f_desc.readline())
    f_desc.seek(0)

    data = []
    t = 0
    delta = 0.02
    for line in reader:
        if line[bs_col].strip() == "BS":
            points = []
        elif line[bs_col].strip() == "BE":
            data.append(points)
        else:
            t = t + delta
            points.append((t, float(line[bs_col]), float(line[bs_col + 1])))
    return data
示例#3
0
def cut_breath_section(descriptor, bn_start, bn_end, start_abs_bs):
    """
    Cut up a file by relative breath number

    :param descriptor: file  descriptor for file to chunk up
    :param bn_start: starting (inclusive) relative breath number
    :param bn_end: ending (inclusive) relative breath number
    :param start_abs_bs: because this function cuts off the absolute breath start timestamp we can provide a new one for the file if we need. If we dont care we can just provide None
    """
    try:
        bn_start = int(bn_start)
        bn_end = int(bn_end)
    except:
        raise ValueError(
            "Must input bn_start and bn_end as integers! Your input "
            "bn_start: {}, bn_end: {}".format(bn_start, bn_end))
    i = 0
    bn = 0  # on APL the relative breath number starts at 1
    if start_abs_bs:
        try:
            datetime.strptime(start_abs_bs, '%Y-%m-%d-%H-%M-%S.%f')
        except:
            raise Exception(
                'start_abs_bs must be in format %Y-%m-%d-%H-%M-%S.%f')
    record_lines = False
    end_next = False
    lines_to_keep = []
    bs_col, ncol, _, __ = detect_version_v2(descriptor.readline())
    descriptor.seek(0)
    reader = csv.reader(descriptor)
    for line in reader:
        if not line:
            continue
        if line[bs_col].strip() == "BS":
            bn += 1
        if bn == bn_start:
            record_lines = True

        if record_lines:
            lines_to_keep.append(i)

        if bn == bn_end and line[bs_col].strip() == "BE":
            descriptor.seek(0)
            lines = descriptor.read().split("\n")
            text = "\n".join(list(itemgetter(*lines_to_keep)(lines)))
            text = start_abs_bs + '\n' + text if start_abs_bs else text
            return StringIO(text)

        i += 1
    else:
        raise Exception(
            "Something went wrong. The input breath numbers seem to "
            "be incorrect or the file format does not match a raw "
            "ventilator waveform file")
示例#4
0
def cut_breath_section(descriptor, bn_start, bn_end):
    """
    Cut up a file by relative breath number
    """
    try:
        bn_start = int(bn_start)
        bn_end = int(bn_end)
    except:
        raise ValueError(
            "Must input bn_start and bn_end as integers! Your input "
            "bn_start: {}, bn_end: {}".format(bn_start, bn_end))
    i = 0
    bn = 0  # on APL the relative breath number starts at 1
    record_lines = False
    end_next = False
    lines_to_keep = []
    bs_col, ncol, _, __ = detect_version_v2(descriptor.readline())
    descriptor.seek(0)
    reader = csv.reader(descriptor)
    for line in reader:
        if line[bs_col].strip() == "BS":
            bn += 1
        if bn == bn_start:
            record_lines = True

        if record_lines:
            lines_to_keep.append(i)

        if bn == bn_end and line[bs_col].strip() == "BE":
            descriptor.seek(0)
            lines = descriptor.read().split("\n")
            return StringIO("\n".join(list(itemgetter(*lines_to_keep)(lines))))

        i += 1
    else:
        raise Exception(
            "Something went wrong. The input breath numbers seem to "
            "be incorrect or the file format does not match a raw "
            "ventilator waveform file")
示例#5
0
def extract_raw(descriptor,
                ignore_missing_bes,
                rel_bn_interval=[],
                vent_bn_interval=[],
                spec_rel_bns=[],
                spec_vent_bns=[]):
    """
    Takes a file descriptor and returns the raw data on the
    breath for us to use. Returns data in format

    {
        'vent_bn': vent_bn,
        't': [rel_t1, rel_t2, ...],
        'ts': [ts1, ts2, ...],
        'flow': [flow1, flow2, ...],
        'pressure': [pressure1, pressure2, ...],
        'be_count': be_count,
        'bs_count': bs_count,
        ....
    }

    :param descriptor: The file descriptor to use
    :param ignore_missing_bes: boolean whether or not to ignore missing BEs in the data (False if we want to use breaths without a BE, True otherwise)
    :param rel_bn_interval: The relative [start, end] interval for the data
    :param vent_bn_interval: The vent bn [start, end] interval for the data
    :param spec_rel_bns: The specific relative bns that we want eg: [1, 10, 20]
    :param spec_vent_bns: The specific vent bns that we want eg: [1, 10, 20]
    """
    # XXX You could probably save yourself a ton of time if you
    # processed the BS/BE rows to remove their trailing commas.
    # then you could use a method like np.genfromtext or something faster
    # than the native csv lib.
    def get_data(flow, pressure, t_array, ts_array, rel_bn, vent_bn, bs_count, be_count, last_t, t_delta):
        flow, pressure, t_array, ts_array = filter_arrays(
            flow, pressure, t_array, ts_array
        )
        if flow:
            data_dict = {
                "rel_bn": rel_bn,
                "vent_bn": vent_bn,
                "flow": flow,
                "pressure": pressure,
                "t": t_array,
                "ts": ts_array,
                "bs_count": bs_count,
                "be_count": be_count,
                "bs_time": bs_time,
                "frame_dur": round(t_array[-1] + t_delta, 2),
                "dt": t_delta,
            }
            return data_dict

    if not  isinstance(descriptor, StringIO) and not "cStringIO" in str(descriptor.__class__) and not isinstance(descriptor, io.TextIOWrapper) and not isinstance(descriptor, io.BufferedReader):
        raise ValueError("Provide a file descriptor as input! Make sure you are using a Python3 compatible descriptor such as io.open.")

    spec_rel_bns = sorted(spec_rel_bns)
    spec_vent_bns = sorted(spec_vent_bns)
    collection_start = False
    last_t = 0  # first data point starts at 0
    bs_count = 0
    be_count = 0
    bs_time = 0.02
    t_delta = 0.02
    rel_ts = 0
    vent_bn = 0
    rel_bn = 0
    has_bs = False
    idx = 0
    flow, pressure, t_array, timestamp_array = reset_arrays(None, None, None, None)
    try:
        descriptor = clear_descriptor_null_bytes(descriptor)
    except UnicodeDecodeError:
        raise BadDescriptorError('You seem to have opened a file with garbled bytes. you should open it using io.open(file, encoding="ascii", errors="ignore"')
    reader = csv.reader(descriptor)
    data_dict = {}
    vent_bn_regex = re.compile("S:(\d+)")
    descriptor.seek(0)
    first_line = descriptor.readline()
    bs_col, ncol, ts_1st_col, ts_1st_row = detect_version_v2(first_line)
    if ts_1st_row:
        abs_time = datetime.strptime(first_line.strip('\r\n'), IN_DATETIME_FORMAT)
    descriptor.seek(0)

    for row in reader:
        try:
            row[bs_col]
        except IndexError:
            continue
        if row[bs_col].strip() == "BS":
            collection_start = True
            if not ignore_missing_bes and has_bs:
                data = get_data(
                    flow, pressure, t_array, timestamp_array, rel_bn, vent_bn, bs_count, be_count, bs_time, t_delta
                )
                if data:
                    yield data
                bs_time = round(last_t + 0.02, 2)
            rel_ts = 0
            bs_count += 1
            rel_bn += 1
            idx = 0
            has_bs = True
            flow, pressure, t_array, timestamp_array = reset_arrays(
                flow, pressure, t_array, timestamp_array
            )
            try:
                match = vent_bn_regex.search(row[bs_col + 1])
            except IndexError:
                has_bs = False
                continue
            if not match:
                has_bs = False  # Don't collect data for the breath
                continue
            vent_bn = int(match.groups()[0])
            if rel_bn_interval and rel_bn > rel_bn_interval[1]:
                return
            elif vent_bn_interval and vent_bn > vent_bn_interval[1]:
                return
            elif spec_rel_bns and rel_bn > spec_rel_bns[-1]:
                return
            elif spec_vent_bns and vent_bn > spec_vent_bns[-1]:
                return
            elif vent_bn_interval and not (vent_bn_interval[0] <= vent_bn <= vent_bn_interval[1]):
                has_bs = False
            elif rel_bn_interval and not (rel_bn_interval[0] <= rel_bn <= rel_bn_interval[1]):
                has_bs = False
            elif spec_rel_bns and (rel_bn not in spec_rel_bns):
                has_bs = False
            elif spec_vent_bns and (vent_bn not in spec_vent_bns):
                has_bs = False
        elif row[bs_col].strip() == "BE":
            be_count += 1
            has_bs = False
            data = get_data(
                flow, pressure, t_array, timestamp_array, rel_bn, vent_bn, bs_count, be_count, bs_time, t_delta
            )
            if data:
                yield data
            bs_time = round(last_t + 0.02, 2)
            rel_ts = 0
        else:
            if collection_start:  # if there is stray data at the top of the file
                # make sure data is correctly formed
                try:
                    float(row[ncol - 2])
                    float(row[ncol - 1])
                except (IndexError, ValueError):
                    continue
                last_t = round(last_t + .02, 2)

            if not has_bs:
                continue
            try:
                flow[idx] = round(float(row[ncol - 2]), 2)
                pressure[idx] = round(float(row[ncol - 1]), 2)
            except (IndexError, ValueError):
                continue
            t_array[idx] = round(rel_ts, 2)
            if ts_1st_col:
                timestamp_array[idx] = row[0]
            elif ts_1st_row:
                timestamp_array[idx] = (abs_time + timedelta(seconds=last_t)).strftime(OUT_DATETIME_FORMAT)
            rel_ts = round(rel_ts + t_delta, 2)
            idx += 1
示例#6
0
def bs_be_denoting_extractor(descriptor, rel_bn_interval=[]):
    """
    Takes a file descriptor without BS/BE markers and then adds
    BS and BE markers to it, and then returns the breath data generator
    from extract_raw

    :param descriptor: A file descriptor for a ventilator data file without
    BS or BE markers.
    """
    last_bs_loc = None
    cur_bs_loc = None
    first_line = descriptor.readline()
    bs_col, ncol, ts_1st_col, ts_1st_row = detect_version_v2(first_line)
    if ts_1st_row:
        data = first_line
    else:
        descriptor.seek(0)
        data = ""
    breath_idx = 1

    flow_min_threshold = 10
    flow_diff_threshold = 5
    n_last_flow_obs = 4
    n_last_pressure_obs = 5
    n_lookback = 4
    n_lookback_fallback = 2
    median_peep = 0
    median_pip = 100
    observations = np.genfromtxt(descriptor, delimiter=',')
    thresh_not_met = True
    peep_buffer = []
    pip_buffer = []
    pressure_buffer_len = 25
    pressure_diff_frac = 0.7

    # The current index we are at in observations variable will always be
    # i+n_last_flow_obs
    for i, obs in enumerate(observations[n_last_flow_obs:]):
        true_idx = i + n_last_flow_obs

        # We are always <n_last_flow_obs> ahead of i in the observations array
        flow_diff = obs[0] - observations[i,0]
        pressure_diff_thresh = (median_pip - median_peep) * pressure_diff_frac

        if obs[1] >= (median_peep + pressure_diff_thresh):
            thresh_not_met = False

        if thresh_not_met and obs[0] >= flow_min_threshold and flow_diff >= flow_diff_threshold:
            thresh_not_met = False
            for offset in range(n_lookback):
                if (
                    true_idx - (offset + 1) < 0
                    or observations[true_idx - (offset + 1), 0] < 0
                ):
                    last_bs_loc = cur_bs_loc
                    # Would including the first negative point be best? Let's try
                    #
                    # Results indicate it's more of a problem than anything, but it
                    # might be worth reinvestigation
                    cur_bs_loc = true_idx - offset
                    break
            else:
                last_bs_loc = cur_bs_loc
                cur_bs_loc = true_idx - n_lookback_fallback

            # XXX Current methodology just constructs a basic file descriptor
            # to pass to extract_raw. This is not very efficient, but was
            # easiest to code for evaluation of algorithms. Future engineering
            # can just modify this function to return our canonical data dict
            if last_bs_loc:
                data += (
                    "BS, S:{}\n".format(breath_idx) +
                    fmt_as_csv(observations[last_bs_loc:cur_bs_loc]) +
                    "\nBE\n"
                )
                breath_idx += 1

            if breath_idx != 1:
                peep_idx = cur_bs_loc - n_last_pressure_obs if cur_bs_loc - n_last_pressure_obs > 0 else 0
                peep = np.mean(observations[peep_idx:true_idx,1])
                pip = np.max(observations[last_bs_loc:cur_bs_loc,1])
                if len(peep_buffer) < pressure_buffer_len:
                    peep_buffer.append(peep)
                    pip_buffer.append(pip)
                else:
                    peep_buffer.pop(0)
                    peep_buffer.append(peep)
                    pip_buffer.pop(0)
                    pip_buffer.append(pip)
                median_peep = np.median(peep_buffer)
                median_pip = np.median(pip_buffer)

            # when debugging the i index cannot be trusted as a gauge of time
            # in relation to the file with BS and BE.
            if breath_idx:
                #import IPython; IPython.embed()
                pass

        elif not thresh_not_met and obs[0] < flow_min_threshold and obs[1] < (median_peep + pressure_diff_thresh):
            thresh_not_met = True
    else:
        data += (
            "BS, S:{}\n".format(breath_idx) +
            fmt_as_csv(observations[cur_bs_loc:]) +
            "\nBE\n"
        )

    return extract_raw(StringIO(data), False, rel_bn_interval=rel_bn_interval)
示例#7
0
def real_time_extractor(descriptor,
                        ignore_missing_bes,
                        rel_bn_interval=[],
                        vent_bn_interval=[],
                        spec_rel_bns=[],
                        spec_vent_bns=[]):
    """
    The exact same functionality as extract_raw, except this method
    returns a list of breaths and is also able to update timestamp based on
    whether/not a new timestamp is found in file. Both of these functions are
    necessary for real time TOR.

    In future, we might be able to consolidate this function with extract_raw,
    but for now this works fine and there is no need to expend the engineering
    effort

    :param descriptor: The file descriptor to use
    :param ignore_missing_bes: boolean whether or not to ignore missing BEs in the data (False if we want to use breaths without a BE, True otherwise)
    :param rel_bn_interval: The relative [start, end] interval for the data
    :param vent_bn_interval: The vent bn [start, end] interval for the data
    :param spec_rel_bns: The specific relative bns that we want eg: [1, 10, 20]
    :param spec_vent_bns: The specific vent bns that we want eg: [1, 10, 20]
    """
    def get_data(flow, pressure, t_array, ts_array, rel_bn, vent_bn, bs_count, be_count, last_t, t_delta):
        flow, pressure, t_array, ts_array = filter_arrays(
            flow, pressure, t_array, ts_array
        )
        if flow:
            data_dict = {
                "rel_bn": rel_bn,
                "vent_bn": vent_bn,
                "flow": flow,
                "pressure": pressure,
                "t": t_array,
                "ts": ts_array,
                "bs_count": bs_count,
                "be_count": be_count,
                "bs_time": bs_time,
                "frame_dur": t_array[-1] + t_delta,
                "dt": t_delta,
            }
            return data_dict

    if not isinstance(descriptor, StringIO) and not "cStringIO" in str(descriptor.__class__) and not isinstance(descriptor, io.TextIOWrapper):
        raise ValueError("Provide a file descriptor as input! Make sure you are using a Python3 compatible descriptor such as io.open.")
    if (len(rel_bn_interval) == 0 and len(vent_bn_interval) == 0 and
        len(spec_rel_bns) == 0 and len(spec_vent_bns) == 0):
        pass
    elif not xor(
            xor(len(rel_bn_interval) > 0, len(vent_bn_interval) > 0),
            xor(len(spec_rel_bns) > 0, len(spec_vent_bns) > 0)
        ):
        raise ValueError("You can only specify one vent or rel bn filtering option for use!")
    spec_rel_bns = sorted(spec_rel_bns)
    spec_vent_bns = sorted(spec_vent_bns)
    collection_start = False
    last_t = 0  # first data point starts at 0
    bs_count = 0
    be_count = 0
    bs_time = 0.02
    t_delta = 0.02
    rel_ts = 0
    vent_bn = 0
    rel_bn = 0
    has_bs = False
    idx = 0
    flow, pressure, t_array, timestamp_array = reset_arrays(None, None, None, None)
    descriptor = clear_descriptor_null_bytes(descriptor)
    reader = csv.reader(descriptor)
    data_dict = {}
    data_list = []
    vent_bn_regex = re.compile("S:(\d+)")
    date_search = re.compile("^20[12]\d-[01]\d-")
    descriptor.seek(0)
    first_line = descriptor.readline()

    # Should we be more strict and now allow breaths without a TS up top?
    bs_col, ncol, ts_1st_col, ts_1st_row = detect_version_v2(first_line)
    if ts_1st_row:
        abs_time = datetime.strptime(first_line.strip('\r\n'), IN_DATETIME_FORMAT)
        start_time = abs_time
    else:
        raise Exception("A breath timestamp must be on first row!")
    descriptor.seek(0)

    for row in reader:
        try:
            row[bs_col]
        except IndexError:
            continue

        # XXX fix bs_time! it is not accurate when we update the timestamp
        #
        # update abs time
        if date_search.search(row[0]):
            abs_time = datetime.strptime(row[0], IN_DATETIME_FORMAT)
            last_t = 0
            bs_time = round((abs_time + timedelta(seconds=0.02) - start_time).total_seconds(), 2)
            continue

        if row[bs_col].strip() == "BS":
            collection_start = True
            if not ignore_missing_bes and has_bs:
                data = get_data(
                    flow, pressure, t_array, timestamp_array, rel_bn, vent_bn, bs_count, be_count, bs_time, t_delta
                )
                if data:
                    data_list.append(data)
                bs_time = round((abs_time + timedelta(seconds=last_t) - start_time).total_seconds(), 2)
            rel_ts = 0
            bs_count += 1
            rel_bn += 1
            idx = 0
            has_bs = True
            flow, pressure, t_array, timestamp_array = reset_arrays(
                flow, pressure, t_array, timestamp_array
            )
            try:
                match = vent_bn_regex.search(row[bs_col + 1])
            except IndexError:
                has_bs = False
                continue
            if not match:
                has_bs = False  # Don't collect data for the breath
                continue
            vent_bn = int(match.groups()[0])
            if rel_bn_interval and rel_bn > rel_bn_interval[1]:
                break
            elif vent_bn_interval and vent_bn > vent_bn_interval[1]:
                break
            elif spec_rel_bns and rel_bn > spec_rel_bns[-1]:
                break
            elif spec_vent_bns and vent_bn > spec_vent_bns[-1]:
                break
            elif vent_bn_interval and not (vent_bn_interval[0] <= vent_bn <= vent_bn_interval[1]):
                has_bs = False
            elif rel_bn_interval and not (rel_bn_interval[0] <= rel_bn <= rel_bn_interval[1]):
                has_bs = False
            elif spec_rel_bns and (rel_bn not in spec_rel_bns):
                has_bs = False
            elif spec_vent_bns and (vent_bn not in spec_vent_bns):
                has_bs = False
        elif row[bs_col].strip() == "BE":
            be_count += 1
            has_bs = False
            data = get_data(
                flow, pressure, t_array, timestamp_array, rel_bn, vent_bn, bs_count, be_count, bs_time, t_delta
            )
            if data:
                data_list.append(data)
            bs_time = round((abs_time + timedelta(seconds=last_t) - start_time).total_seconds(), 2)
            rel_ts = 0
        else:
            if collection_start:  # if there is stray data at the top of the file
                # make sure data is correctly formed
                try:
                    float(row[ncol - 2])
                    float(row[ncol - 1])
                except (IndexError, ValueError):
                    continue
                last_t = round(last_t + .02, 2)

            if not has_bs:
                continue
            try:
                flow[idx] = round(float(row[ncol - 2]), 2)
                pressure[idx] = round(float(row[ncol - 1]), 2)
            except (IndexError, ValueError):
                continue
            t_array[idx] = round(rel_ts, 2)
            if ts_1st_col:
                timestamp_array[idx] = row[0]
            elif ts_1st_row:
                timestamp_array[idx] = (abs_time + timedelta(seconds=last_t)).strftime(OUT_DATETIME_FORMAT)
            rel_ts = round(rel_ts + t_delta, 2)
            idx += 1

    return data_list