def test_newstyle_date_file(): desc = open(CUT_BREATH_SECTION, encoding='ascii', errors='ignore') new_desc = cut_breath_section(desc, 1, 100, '2018-10-17-13-15-45.844796') gen = list(extract_raw(new_desc, False)) assert len(gen) == 99, len(gen) for b in gen: assert 1 <= b['rel_bn'] <= 100
def test_failing_abs_bs(): gen = extract_raw(open(FAILING_ABS_BS), False) has_breaths = False for b in gen: has_breaths = True assert b['abs_bs'] is not None assert has_breaths
def test_bad_unicode_error_fails_with_no_encoding(): gen = extract_raw(open(BAD_UNICODE_ERROR, 'rb'), False) try: for b in gen: assert False except BadDescriptorError: pass
def test_raw_utils_3_columns(): f = open(RAW_UTILS_3_COLUMNS_TEST) generator = extract_raw(f, False) has_data = False for breath in generator: has_data = True assert has_data
def _get_file_breath_meta(func, file, tve_pos, ignore_missing_bes, rel_bn_interval, vent_bn_interval, to_data_frame, spec_vent_bns, spec_rel_bns): if isinstance(file, str): file = open(file) if "experimental" in func.__name__: array = [EXPERIMENTAL_META_HEADER] else: array = [META_HEADER] missing_be_count_threshold = 1000 missing_be_ratio_threshold = 0.8 for breath in extract_raw(file, ignore_missing_bes, rel_bn_interval=rel_bn_interval, vent_bn_interval=vent_bn_interval, spec_vent_bns=spec_vent_bns, spec_rel_bns=spec_rel_bns): bs_count = breath['bs_count'] be_count = breath['be_count'] missing_be = bs_count - be_count if (missing_be > missing_be_count_threshold) and ( missing_be / float(bs_count) > missing_be_ratio_threshold): return array array.append(func(breath)) if not to_data_frame: return array else: return pd.DataFrame(array[1:], columns=array[0])
def main(): parser = argparse.ArgumentParser() parser.add_argument('file') parser.add_argument('--min-time', default=0.5, type=float) parser.add_argument('--flow-bound', default=0.2, type=float) args = parser.parse_args() gen = extract_raw(open(args.file, errors='ignore', encoding='ascii'), False) table = PrettyTable() table.field_names = ['rel_bn', 'abs_bs'] for br in gen: is_plat = check_if_plat_occurs(br['flow'], br['pressure'], br['dt'], min_time=args.min_time, flow_bound=args.flow_bound) if is_plat: table.add_row([br['rel_bn'], br['abs_bs']]) if len(table._rows) > 0: print(table) else: print('No plats found using min_time: {} flow_bound: {}'.format( args.min_time, args.flow_bound))
def test_raw_utils2(): gen = extract_raw(open(RAW_UTILS_TEST2), False) has_breaths = False for b in gen: has_breaths = True break assert has_breaths
def test_malformed_breath_non_captured(): """ Ostensibly this would be because there is no BE """ f = open(MALFORMED_BREATH) generator = extract_raw(f, True) breaths = list(generator) assert not breaths
def test_bad_unicode_error(): gen = extract_raw( open(BAD_UNICODE_ERROR, encoding='ascii', errors='ignore'), False) has_breaths = False for b in gen: has_breaths = True break assert has_breaths
def test_extract_raw_ensure_no_empty_rows(): f = open(RAW_UTILS_TEST) generator = extract_raw(f, False) has_data = False for sec in generator: has_data = True assert sec['flow'] assert has_data
def test_bad_unicode_error_fails_with_no_encoding(): # this test fails on python 2.7 but succeeds on python3.7. Could this be because of # differences in how open works between the two versions? try: gen = extract_raw(open(BAD_UNICODE_ERROR, 'rb'), False) for b in gen: assert False except BadDescriptorError: pass
def test_extract_raw_with_spec_rel_bns(): f = open(RAW_UTILS_TEST) generator = extract_raw(f, False, spec_rel_bns=[2, 3, 5, 7, 9]) has_data = False for breath in generator: has_data = True if breath['rel_bn'] not in [2, 3, 5, 7, 9]: assert False, breath['rel_bn'] assert has_data
def test_ensure_things_not_double_counter(): f = open(RAW_UTILS_TEST) previous_vent_bn = None generator = extract_raw(f, False, vent_bn_interval=[65427, 65428]) has_data = False for sec in generator: assert sec['vent_bn'] != previous_vent_bn has_data = True previous_vent_bn = sec['vent_bn'] assert has_data
def test_raw_utils_with_spec_rel_and_vent_bns3(): gen = extract_raw(open(JIMMY_TEST), False, spec_rel_bns=[1], spec_vent_bns=[500]) has_breaths = False for b in gen: has_breaths = True break assert not has_breaths
def test_extract_raw_with_interval(): f = open(RAW_UTILS_TEST) generator = extract_raw(f, False, vent_bn_interval=[65427, 65428]) has_data = False for sec in generator: has_data = True if sec['vent_bn'] not in [65427, 65428]: assert False, data['vent_bn'] # Ensure that bs_time doesn't start at 0.02 assert sec['bs_time'] != 0.02 assert has_data
def test_preprocessed_files_work_with_breath_meta(self): raw_proc = 'tmp.test.raw.npy' proc_proc = 'tmp.test.processed.npy' process_breath_file(open(RAW_UTILS_TEST2), False, 'tmp.test') gen_processed = list(read_processed_file(raw_proc, proc_proc)) os.remove(raw_proc) os.remove(proc_proc) for i, breath in enumerate(extract_raw(open(RAW_UTILS_TEST2), False)): bm_orig = get_production_breath_meta(breath) bm_new = get_production_breath_meta(gen_processed[i]) assert_list_equal(bm_orig, bm_new)
def main(): parser = argparse.ArgumentParser() parser.add_argument('input_file') args = parser.parse_args() generator = extract_raw(open(args.input_file, "rbU"), False) base = basename(args.input_file) base = splitext(base)[0] filename = base + '_wt.csv' filename = os.path.join(os.path.dirname(__file__), config.DATA_DIR, filename) with open(filename, 'wb') as f: a = csv.writer(f, delimiter=',') bs_time = 0.02 for breath in generator: if len(breath['ts']) != 0: bs_time = datetime.datetime.strptime(breath['ts'][0], "%Y-%m-%d %H-%M-%S.%f") t = time.mktime( bs_time.timetuple()) * 1e3 + bs_time.microsecond / 1e3 dt = 20 for i, obs in enumerate(breath['flow']): # 20 corresponds with 20 milliseconds or .02 seconds a.writerow([ "{:0.2f}".format(round(t + (20 * (i - 1)), 2)), "{:0.2f}".format(obs), " {:0.2f}".format(breath['pressure'][i]), 0 ]) else: # we can only use rel time in this case for i, obs in enumerate(breath['flow']): a.writerow([ "{:0.2f}".format(round(bs_time + (.02 * i), 2)), "{:0.2f}".format(obs), " {:0.2f}".format(breath['pressure'][i]), 0 ]) bs_time = bs_time + breath['frame_dur']
def test_read_processed_file(): out_raw = 'tmp.test.raw.npy' out_proc = 'tmp.test.processed.npy' gen = list(extract_raw(open(RAW_UTILS_TEST2), False)) process_breath_file(open(RAW_UTILS_TEST2), False, 'tmp.test') compr = list(read_processed_file(out_raw, out_proc)) for idx, breath in enumerate(gen): orig = dict( rel_bn=breath['rel_bn'], vent_bn=breath['vent_bn'], flow=breath['flow'], pressure=breath['pressure'], abs_bs=breath['abs_bs'], bs_time=breath['bs_time'], frame_dur=breath['frame_dur'], dt=breath['dt'], ) new = compr[idx] new['flow'] = new['flow'] new['pressure'] = new['pressure'] assert_dict_equal(orig, new) os.remove(out_raw) os.remove(out_proc)
def main(): parser = argparse.ArgumentParser() parser.add_argument('file') args = parser.parse_args() file_txt = "" gen = extract_raw(open(args.file), False) for breath in gen: try: dt = datetime.strptime(breath['ts'][0], '%Y-%m-%d %H:%M:%S.%f') except: dt = datetime.strptime(breath['ts'][0][:-3], '%Y-%m-%d %H:%M:%S.%f') file_txt += dt.strftime(IN_DATETIME_FORMAT) + '\n' file_txt += "BS, S:{},".format(breath['vent_bn']) + '\n' for i, val in enumerate(breath['flow']): file_txt += '{}, {}'.format(round(val, 2), round(breath['pressure'][i], 2)) + '\n' file_txt += 'BE' + '\n' with open(args.file + '.conv', 'w') as f: f.write(unicode(file_txt)) os.rename(args.file + '.conv', args.file)
def _get_file_breath_meta(func, file, tve_pos, ignore_missing_bes, rel_bn_interval, vent_bn_interval, to_data_frame, spec_vent_bns, spec_rel_bns): if isinstance(file, str): file = open(file) if "experimental" in func.__name__: array = [EXPERIMENTAL_META_HEADER] else: array = [META_HEADER] # XXX add logic for accepting a raw_utils array for breath in extract_raw(file, ignore_missing_bes, rel_bn_interval=rel_bn_interval, vent_bn_interval=vent_bn_interval, spec_vent_bns=spec_vent_bns, spec_rel_bns=spec_rel_bns): array.append(func(breath)) if not to_data_frame: return array else: return pd.DataFrame(array[1:], columns=array[0])
def test_raw_utils_3_columns(): f = open(RAW_UTILS_3_COLUMNS_TEST) generator = extract_raw(f, False) has_data = False assert len(generator) == 61, len(generator)
def do_basket_function(n_regions, feature_func, gold_stnd_func): observations, files, target_vector = gold_stnd_func() rows = [] for file in files: patient = file.split("/")[-2] pt_observations = observations[observations['patient'] == patient] f = clear_descriptor_null_bytes( open(file, encoding='ascii', errors='ignore')) selection = cut_breath_section(f, pt_observations['BN'].min(), pt_observations['BN'].max()) all_metadata = get_file_experimental_breath_meta(selection) all_metadata = pd.DataFrame(all_metadata[1:], columns=all_metadata[0]) # implement rounding all_metadata = all_metadata.round(2) all_metadata = all_metadata.round({"tvi": 1, "tve": 1}) selection.seek(0) prev_prev_metadata = None prev_metadata = None pt_obs = 0 for idx, breath in enumerate(extract_raw(selection, True)): metadata = all_metadata.loc[idx] # 28 is the index for x0. Overall this is a little hacky and # we should consider either adding a relative x0 time or an x01 index # to the metadata x01_index = metadata[28] i_pressure = breath['pressure'][:x01_index - 1] if x01_index != 0 else breath[ 'pressure'] bs_time = metadata[2] if len(i_pressure) < n_regions: # XXX this branch is unused and broken row = [0] * df.shape[1] else: row, colnames = feature_func( iter_baskets_contiguous(i_pressure, n_regions), i_pressure, metadata, prev_metadata, prev_prev_metadata) row = row + [ bs_time, patient, breath["vent_bn"], breath['rel_bn'], file ] rows.append(row) if not isinstance(prev_metadata, type(None)): prev_prev_metadata = prev_metadata.copy() prev_metadata = metadata.copy() pt_obs += 1 df = pd.DataFrame(rows, columns=colnames + ['bs_time', "patient", "vent_bn", 'rel_bn', "filename"]) # Final processing on the DF pre_num_cols = df.shape[1] df.index = observations.index df['y'] = target_vector new_index = [] for idx, i in enumerate(df.index): # should be <idx>-<patient id>-<bs time> new_index.append("{}-{}-{}".format(i, observations.iloc[idx]['patient'], df.iloc[idx]["bs_time"])) df.index = new_index del df['bs_time'] df = df.replace([np.inf, -np.inf], np.nan).dropna() return df
def test_that_we_get_breath_at_end_when_no_skip_be(): gen = extract_raw(open_func(BE_NOT_AT_END), False) assert gen[-1]['vent_bn'] == 14635 assert gen[-1]['rel_bn'] == 14 assert len(gen[-1]['flow']) == 13
import io import csv import argparse from ventmap.breath_meta import get_production_breath_meta from ventmap.raw_utils import extract_raw from ventmap.constants import META_HEADER parser = argparse.ArgumentParser(description="Convert ventMAP csv to rapidalarm csv") parser.add_argument('infile', type=str, help="path to input ventMAP csv") parser.add_argument('outfile', type=str, help="path to output csv") parser.add_argument('-r', type=str, default=50, help="input file samplerate") args = parser.parse_args() generator = extract_raw(io.open(args.infile), False) # pressure, flow, PIP, PEEP, RR waveforms pressure = [] flow = [] pip = [] peep = [] rr = [] # read each breath waveform and precomputed scalar metrics for breath in generator: # load single breath prod_breath_meta = get_production_breath_meta(breath) # get breath pressure and ground truth PIP, PEEP, RR pressure += breath['pressure']
def test_to_series_works(self): for i, breath in enumerate(extract_raw(open(RAW_UTILS_TEST2), False)): bm_orig = get_production_breath_meta(breath, to_series=True) assert isinstance(bm_orig, pd.Series)
def write_base_file_breath_meta(self): peep_prev = 'N/A' rel_time = 0.02 with open(self.base_filename, 'rU') as out, open(self.apfile, 'wt') as ap: generator = extract_raw(out, False) aptv_writer = csv.writer(ap, delimiter=',', quoting=csv.QUOTE_NONE) for breath in generator: meta = get_production_breath_meta(breath) meta_exp = get_experimental_breath_meta(breath) # set datetime format if len(breath['ts']) != 0: desired_format = "%Y-%m-%d %H:%M:%S.%f" as_dt = datetime.strptime(breath['ts'][0], "%Y-%m-%d %H-%M-%S.%f") abs_bs_time = as_dt.strftime(desired_format) else: abs_bs_time = "" tvi = round(meta[9], 1) tve = round(meta[10], 1) try: tv_ratio = round(abs(float(tve / tvi)), 2) except ZeroDivisionError: tv_ratio = 'inf' itime = round(meta[6], 2) etime = round(meta[7], 2) min_pressure = round(meta[35], 2) peep = round(meta[17], 2) fbit = meta[6] pbit = meta_exp[-1] fbit_pbit = fbit / pbit slope_dyna = cal_slope_dyna(breath) slope_static = cal_slope_static(breath) flow_median = median_flow_dyna(breath) # The only trick is that these columns must match with the ordering # in self.columns vals = [ breath['rel_bn'], breath['vent_bn'], rel_time, abs_bs_time, tvi, tve, tv_ratio, etime, itime, peep, peep_prev, min_pressure, round(pbit, 2), round(fbit_pbit, 2), round(slope_dyna, 2), round(slope_static, 2), round(flow_median, 2), ] if len(vals) != len(self.columns): raise Exception( 'number of columns does not match number of values trying to be written!' ) aptv_writer.writerow(vals) peep_prev = peep rel_time = round(rel_time + breath['frame_dur'], 2)
def test_malformed_breath_is_captured(): f = open(MALFORMED_BREATH) generator = extract_raw(f, False) breaths = list(generator) assert breaths
def test_extract_raw_sunny_day(): # To ensure everything is ok f = open(ARDS_AND_COPD) breaths = list(extract_raw(f, False)) assert breaths