def admissionDates(fromDate, toDate): if fromDate == '': if toDate == '': return fromDate = toDate toDate = '' curDate = util.toTime(fromDate) endDate = util.toTime(toDate) if toDate != '' else curDate while curDate <= endDate: handleStatusEvent(curDate, STATUS_IN) curDate = util.nextDay(curDate)
def emitLab(date, loinc, result, resultFlag): # TODO add provider/physician here as well? event = createEntry(TYPE_LABTEST, "loinc__" + loinc, claim_id, result != '' or resultFlag != '', resultFlag, result) event['time'] = util.toTime(date) obj['events'].append(event)
def dates(fromDate, toDate): if fromDate == '': if toDate == '': return fromDate = toDate toDate = '' curDate = util.toTime(fromDate) endDate = util.toTime(toDate) if toDate != '' else curDate while curDate <= endDate: for event in handleEvent(row, claim_id): event['time'] = curDate handleKey(row, "claim_amount", MODE_OPTIONAL, lambda amount: addCost(event, amount)) obj['events'].append(event) handleStatusEvent(curDate, curStatus) handleKey( row, "location_flag", MODE_OPTIONAL, lambda flag: handleStatusEvent( curDate, STATUS_FLAG_MAP.get(flag, STATUS_UNKNOWN))) curDate = util.nextDay(curDate)
def dates(fromDate, toDate): if fromDate == '': if toDate == '': return fromDate = toDate toDate = '' curDate = util.toTime(fromDate) endDate = util.toTime(toDate) if toDate != '' else curDate while curDate <= endDate: for event in handleEvent(row, claim_id): event['time'] = curDate handleKey(row, "claim_amount", MODE_OPTIONAL, lambda amount: addCost(event, amount) ) obj['events'].append(event) handleStatusEvent(curDate, curStatus) handleKey(row, "location_flag", MODE_OPTIONAL, lambda flag: handleStatusEvent(curDate, STATUS_FLAG_MAP.get(flag, STATUS_UNKNOWN)) ) curDate = util.nextDay(curDate)
def readShelve(pid, settings, output): pids = [ pid ] if pid == '--all': pids = getAll(settings) out = { 'delim': settings['delim'], 'quote': settings['quote'], 'out': output } anonymize = settings['anonymize']['do'] first = True for patientId in pids: if anonymize: realId = hashlib.sha1(patientId).hexdigest() age_shift = 0 while age_shift == 0: age_shift = random.randint(-10, 10) date_shift = 0 while date_shift == 0: date_shift = random.randint(-365 * 10, 365 * 10) else: realId = patientId join_id = settings['join_id'] splitter = settings['row_split'] (row_defs, length, all_hdrs) = openDB(patientId, settings, out, first) first = False for row_def in row_defs: start = row_def['start'] col_num = row_def['col_num'] skip = row_def['skip'] # manipulation ixs apply before skipping age_ixs = [ ix - start for ix in xrange(start, start + col_num) if all_hdrs[ix] in settings['anonymize']['age_columns'] ] date_ixs = [ ix - start for ix in xrange(start, start + col_num) if all_hdrs[ix] in settings['anonymize']['date_columns'] ] redact_ixs = [ ix - start for ix in xrange(start, start + col_num) if all_hdrs[ix] in settings['anonymize']['redact_columns'] ] for row in row_def['data']: if row == '': continue values = row.strip().split(splitter) if anonymize: for ix in age_ixs: values[ix] = str(int(values[ix]) + age_shift) for ix in date_ixs: values[ix] = util.from_time(util.shift_days(util.toTime(values[ix]), date_shift)) for ix in redact_ixs: values[ix] = '' id = values.pop(skip) if len(values) != col_num: print("column mismatch! expected {0} got {1}: {2}".format(str(col_num), str(len(values)), row), file=sys.stderr) continue if id != patientId: print("unexpected id! expected {0} got {1}: {2}".format(patientId, id, row)) continue writeRow(values, out, start, length, realId)
def emitNDC(date, ndc): # TODO add provider/physician here as well? event = createEntry(TYPE_PRESCRIBED, "ndc__" + ndc, claim_id) curDate = util.toTime(date) event['time'] = curDate handleKey( row, "location_flag", MODE_OPTIONAL, lambda flag: handleStatusEvent( curDate, STATUS_FLAG_MAP.get(flag, STATUS_UNKNOWN))) handleKey(row, "prescribed_amount", MODE_OPTIONAL, lambda amount: addCost(event, amount)) obj['events'].append(event)
def emitNDC(date, ndc): # TODO add provider/physician here as well? event = createEntry(TYPE_PRESCRIBED, "ndc__" + ndc, claim_id) curDate = util.toTime(date) event['time'] = curDate handleKey(row, "location_flag", MODE_OPTIONAL, lambda flag: handleStatusEvent(curDate, STATUS_FLAG_MAP.get(flag, STATUS_UNKNOWN)) ) handleKey(row, "prescribed_amount", MODE_OPTIONAL, lambda amount: addCost(event, amount) ) obj['events'].append(event)
def to_time(self, value): return util.toTime(value.strftime("%Y%m%d"))
def date(): lit = literal() try: return util.toTime(lit) except Exception, e: err(State.pos - len(lit), State.pos, "cannot convert to date: " + str(e))
while args: arg = args.pop(0) if arg == '--': break if arg == '-h': usage() if arg == '--num-cutoff': if not args or args[0] == '--': print('--num-cutoff requires number', file=sys.stderr) usage() num_cutoff = int(args.pop(0)) elif arg == '--age-time': if not args or args[0] == '--': print('--age-time requires a date', file=sys.stderr) usage() age_time = util.toTime(args.pop(0)) elif arg == '--from': if not args or args[0] == '--': print('--from requires a date', file=sys.stderr) usage() from_time = util.toTime(args.pop(0)) elif arg == '--to': if not args or args[0] == '--': print('--to requires a date', file=sys.stderr) usage() to_time = util.toTime(args.pop(0)) elif arg == '-w': if not args or args[0] == '--': print('-w requires whitelist file', file=sys.stderr) usage() if whitelist is None:
def readShelve(pid, settings, output): pids = [pid] if pid == '--all': pids = getAll(settings) out = { 'delim': settings['delim'], 'quote': settings['quote'], 'out': output } anonymize = settings['anonymize']['do'] first = True for patientId in pids: if anonymize: realId = hashlib.sha1(patientId).hexdigest() age_shift = 0 while age_shift == 0: age_shift = random.randint(-10, 10) date_shift = 0 while date_shift == 0: date_shift = random.randint(-365 * 10, 365 * 10) else: realId = patientId join_id = settings['join_id'] splitter = settings['row_split'] (row_defs, length, all_hdrs) = openDB(patientId, settings, out, first) first = False for row_def in row_defs: start = row_def['start'] col_num = row_def['col_num'] skip = row_def['skip'] # manipulation ixs apply before skipping age_ixs = [ ix - start for ix in xrange(start, start + col_num) if all_hdrs[ix] in settings['anonymize']['age_columns'] ] date_ixs = [ ix - start for ix in xrange(start, start + col_num) if all_hdrs[ix] in settings['anonymize']['date_columns'] ] redact_ixs = [ ix - start for ix in xrange(start, start + col_num) if all_hdrs[ix] in settings['anonymize']['redact_columns'] ] for row in row_def['data']: if row == '': continue values = row.strip().split(splitter) if anonymize: for ix in age_ixs: values[ix] = str(int(values[ix]) + age_shift) for ix in date_ixs: values[ix] = util.from_time( util.shift_days(util.toTime(values[ix]), date_shift)) for ix in redact_ixs: values[ix] = '' id = values.pop(skip) if len(values) != col_num: print("column mismatch! expected {0} got {1}: {2}".format( str(col_num), str(len(values)), row), file=sys.stderr) continue if id != patientId: print("unexpected id! expected {0} got {1}: {2}".format( patientId, id, row)) continue writeRow(values, out, start, length, realId)