def buildModelColumns(self): """Builds a set of wide and deep feature columns.""" # Continuous columns geschlecht = tf.feature_column.categorical_column_with_vocabulary_list( 'Geschlecht', self.dataset_options.getFeatureCategories('Geschlecht')) eintrittsalter = tf.feature_column.numeric_column('Eintrittsalter', dtype=tf.float32) verweildauer = tf.feature_column.numeric_column('Verweildauer', dtype=tf.float32) categories_hauptdiagnose = self.dataset_options.getFeatureCategories( 'Hauptdiagnose') hauptdiagnose = tf.feature_column.categorical_column_with_vocabulary_list( 'Hauptdiagnose', categories_hauptdiagnose) nebendiagnose = tf.feature_column.categorical_column_with_vocabulary_list( 'DK', helpers.getDKverylightGrouping()) feature_columns = [] feature_columns.append(eintrittsalter) feature_columns.append(verweildauer) feature_columns.append(tf.feature_column.indicator_column(geschlecht)) feature_columns.append( tf.feature_column.embedding_column( categorical_column=hauptdiagnose, dimension=8)) feature_columns.append( tf.feature_column.embedding_column( categorical_column=nebendiagnose, dimension=8)) return feature_columns
def buildModelColumns(self): """Builds a set of wide and deep feature columns.""" # Continuous columns gender = tf.feature_column.categorical_column_with_vocabulary_list( 'gender', self.dataset_options.getFeatureCategories('gender')) main_diag = tf.feature_column.categorical_column_with_vocabulary_list( 'main_diag', self.dataset_options.getFeatureCategories('main_diag')) age = tf.feature_column.numeric_column('age_dsch', dtype=tf.float32) los = tf.feature_column.numeric_column('los', dtype=tf.float32) diag_other = helpers.getDKverylightGrouping() other_diag = tf.feature_column.categorical_column_with_vocabulary_list( 'diag', diag_other) # feature_columns = [] feature_columns = tf.feature_column.shared_embedding_columns( [main_diag, other_diag], dimension=128) feature_columns.append(age) feature_columns.append(los) feature_columns.append( tf.feature_column.embedding_column(categorical_column=gender, dimension=1)) print('len(feature_columns): ' + str(len(feature_columns))) # feature_columns.append(tf.feature_column.embedding_column(categorical_column=main_diag, dimension=26)) # feature_columns.append(tf.feature_column.embedding_column(categorical_column=other_diag, # dimension=26, # combiner='sqrtn')); return feature_columns
def getDiagGroupNames(self): if self.grouping == 'verylightgrouping': group_names = helpers.getDKverylightGrouping(); elif self.grouping == 'lightgrouping': group_names = helpers.getDKlightGrouping(); elif self.grouping == 'grouping': group_names = helpers.getDKgrouping(); else: group_names = []; return group_names;
def __getGroupNames(self, group): if group == 'CHOP': group_names = getCHOPgrouping() group_names.insert(0, 'Fall') elif group == 'DK': if self.options.getGroupingName() == 'grouping': group_names = getDKgrouping() elif self.options.getGroupingName() == 'lightgrouping': group_names = getDKlightGrouping() elif self.options.getGroupingName() == 'verylightgrouping': group_names = getDKverylightGrouping() else: print('grouping scheme ist not known...exit') sys.exit() group_names.insert(0, 'Fall') elif group == 'OE': group_names = getOEgrouping() group_names.insert(0, 'Fall') else: print('group name is not known...exit') sys.exit() return group_names
def buildModelColumns(self): """Builds a set of wide and deep feature columns.""" # Continuous columns gender = tf.feature_column.categorical_column_with_vocabulary_list( 'gender', self.dataset_options.getFeatureCategories('gender') ) adm_src = tf.feature_column.categorical_column_with_vocabulary_list( 'adm_src', self.dataset_options.getFeatureCategories('adm_src') ) adm_type = tf.feature_column.categorical_column_with_vocabulary_list( 'adm_type', self.dataset_options.getFeatureCategories('adm_type') ) event_type = tf.feature_column.categorical_column_with_vocabulary_list( 'event_type', self.dataset_options.getFeatureCategories('event_type') ) end_type = tf.feature_column.categorical_column_with_vocabulary_list( 'end_type', self.dataset_options.getFeatureCategories('end_type') ) facility_type = tf.feature_column.categorical_column_with_vocabulary_list( 'facility_type', self.dataset_options.getFeatureCategories('facility_type') ) agency_type = tf.feature_column.categorical_column_with_vocabulary_list( 'agency_type', self.dataset_options.getFeatureCategories('agency_type') ) private_flag = tf.feature_column.categorical_column_with_vocabulary_list( 'private_flag', self.dataset_options.getFeatureCategories('private_flag') ) purchaser = tf.feature_column.categorical_column_with_vocabulary_list( 'purchaser', self.dataset_options.getFeatureCategories('purchaser') ) short_stay_flag = tf.feature_column.categorical_column_with_vocabulary_list( 'Short_Stay_ED_Flag', self.dataset_options.getFeatureCategories('Short_Stay_ED_Flag') ) transfer_event_flag = tf.feature_column.categorical_column_with_vocabulary_list( 'transfer_event_flag', self.dataset_options.getFeatureCategories('transfer_event_flag') ) main_diag = tf.feature_column.categorical_column_with_vocabulary_list( 'main_diag', self.dataset_options.getFeatureCategories('main_diag') ) age = tf.feature_column.numeric_column('age_dsch', dtype=tf.float32) los = tf.feature_column.numeric_column('los', dtype=tf.float32); diag_other = helpers.getDKverylightGrouping() other_diag = tf.feature_column.categorical_column_with_vocabulary_list( 'diag', diag_other ) feature_columns = [] feature_columns.append(age); feature_columns.append(los); feature_columns.append(tf.feature_column.indicator_column(adm_src)); feature_columns.append(tf.feature_column.indicator_column(private_flag)); feature_columns.append(tf.feature_column.indicator_column(short_stay_flag)); feature_columns.append(tf.feature_column.indicator_column(transfer_event_flag)); feature_columns.append(tf.feature_column.embedding_column(categorical_column=gender, dimension=2)) feature_columns.append(tf.feature_column.embedding_column(categorical_column=event_type, dimension=2)) feature_columns.append(tf.feature_column.embedding_column(categorical_column=end_type, dimension=2)) feature_columns.append(tf.feature_column.embedding_column(categorical_column=facility_type, dimension=2)) feature_columns.append(tf.feature_column.embedding_column(categorical_column=agency_type, dimension=2)) feature_columns.append(tf.feature_column.embedding_column(categorical_column=purchaser, dimension=2)) feature_columns.append(tf.feature_column.embedding_column(categorical_column=main_diag, dimension=8)) feature_columns.append(tf.feature_column.embedding_column(categorical_column=other_diag, dimension=8)); print('len(feature_columns): ' + str(len(feature_columns))); return feature_columns;
'1', '2', '3', '4', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '99' ] CATEGORICAL_DATA['agency_type'] = [ '1', '2', '9', '10', '11', '12', '13', '14', '8' ] CATEGORICAL_DATA['private_flag'] = ['N', 'Y'] CATEGORICAL_DATA['purchaser'] = [ '6', '17', '19', '20', '33', '34', '35', '55', '98', 'A0', '1', '2', '3', '4', '5', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '18', 'A1', 'A2', 'A3', 'A4', 'A5', 'A6', 'A7' ] CATEGORICAL_DATA['Short_Stay_ED_Flag'] = ['N', 'Y'] #CATEGORICAL_DATA['early_readmission_flag'] = ['N', 'Y'] CATEGORICAL_DATA['transfer_event_flag'] = ['N', 'Y'] CATEGORICAL_DATA['main_diag'] = helpers.getDKverylightGrouping() EXPLICIT_DATA_TYPES = { 'gender': str, 'adm_src': str, 'adm_type': str, 'event_type': str, 'end_type': str, 'facility_type': str, 'agency_type': str, 'private_flag': str, 'purchaser': str, 'Short_Stay_ED_Flag': str, 'early_readmission_flag': str, 'transfer_event_flag': str }
'NE Neue.Neueint', 'WE:Wiedereintr.', 'N1:Nierenstein1', 'N2:Nierenstein2', 'S: Selbsteinw.']; CATEGORICAL['Entlassart'] = ['iniDri', 'exPat', 'gSpit', 'vSpit', 'sSpit','Plan', 'inPat', 'iniBeh']; CATEGORICAL['Eintrittsart'] = ['Ver', 'Not', 'Ang', 'Geb', 'Int', 'unb']; CATEGORICAL['EntlassBereich'] = ['SaO', 'Med', 'Gyn', 'Oth', 'N.A.']; CATEGORICAL['Versicherungsklasse'] = ['A', 'S', 'P', 'H']; CATEGORICAL['Entlassmonat'] = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']; CATEGORICAL['Aufnahmemonat'] = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']; CATEGORICAL['Aufnahmetag'] = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']; CATEGORICAL['Entlasstag'] = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']; CATEGORICAL['Entlassjahr'] = ['2011', '2012', '2013', '2014', '2015', '2016', '2017']; CATEGORICAL['Aufnahmejahr'] = ['2011', '2012', '2013', '2014', '2015', '2016', '2017']; CATEGORICAL['Liegestatus'] = ['kurz', 'norm', 'lang', 'vap', 'opti', 'unb']; CATEGORICAL['Geschlecht'] = ['weiblich', 'maennlich']; CATEGORICAL['Forschungskonsent'] = ['ein', 'unb']; CATEGORICAL['Hauptdiagnose'] = helpers.getDKverylightGrouping(); CATEGORICAL['AufnehmOE'] = helpers.getOEgrouping(); CATEGORICAL['EntlassOE'] = helpers.getOEgrouping(); CATEGORICAL['DRGCode'] = helpers.getDRGgrouping() SUBGROUPS = ['OE', 'DK', 'CHOP'] NUM_DAYS_READMISSION = 18; EARLY_READMISSION_FLAG = 'Wiederkehrer'; EVENT_FLAG = 'Fall'; HAUPTDIAGNOSE = 'Hauptdiagnose'; NEBENDIAGNOSE = 'DK' NEW_FEATURES = ['previous_visits', 'ratio_los_age', 'ratio_numDK_age', 'ratio_los_numDK', 'ratio_numCHOP_age', 'ratio_los_numOE', 'ratio_numOE_age', 'mult_los_numCHOP', 'mult_equalOE_numDK', 'ratio_drg_los_alos'];
import sys import shutil import tensorflow as tf from tensorflow.python.summary import summary import helpers.helpers as helpers diag_group_names = helpers.getDKverylightGrouping() class AutoEncoderEstimator: def __init__(self, feature_columns, flags): self.feature_columns = feature_columns self.flags = flags self.estimator = None return def _add_hidden_layer_summary(self, value, tag): summary.scalar('%s/fraction_of_zero_values' % tag, tf.nn.zero_fraction(value)) summary.histogram('%s/activation' % tag, value) def _dense_batch_relu(self, input, num_nodes, phase, layer_name, batchnorm, dropout): if batchnorm: out = tf.layers.dense(input, num_nodes, activation=tf.nn.relu, name=layer_name) out = tf.layers.batch_normalization(out, training=phase)
dirNN = '/Users/towyku74/UniBas/sciCore/projects/PATREC/trained_models/dev/nz_20012011_reduction_FUSION_embedding_verylightgrouping_20_10_10_dropout_0.5_learningrate_0.05_batchnorm_True_batchsize_640/' filename_weights_main_diag = dirNN + 'weights_embedding_main_diag.npy' weights = np.load(filename_weights_main_diag) num_diags = 2600 num_categories = 26 cnt = 0 labels = np.zeros(num_diags) labels_maincat = [] for k in range(0, 26): for l in range(0, 100): labels[cnt] = k labels_maincat.append(alphabet[k]) cnt += 1 labels_finegrained = helpers.getDKverylightGrouping() filename_labels = dirNN + 'labels_cat.tsv' file_labels = open(filename_labels, 'w') file_labels.write('main_category' + '\t' + 'category' + '\n') for k in range(0, len(labels_maincat)): file_labels.write(labels_maincat[k] + '\t' + labels_finegrained[k] + '\n') file_labels.close() colors = plt.cm.rainbow(np.linspace(0, 1, num_categories)) pca = PCA(n_components=2) weights_2d_pca = pca.fit_transform(weights) tsne = TSNE(n_components=2) weights_2d_tsne = tsne.fit_transform(weights)