if len(sys.argv) > 2: threshold_epoch = int(sys.argv[2]) dict_data_train = { 'dir_data': DIRPROJECT + 'data/', 'data_prefix': 'nz', 'dataset': '20122016', 'encoding': 'embedding', 'newfeatures': None, 'featurereduction': { 'method': 'FUSION' }, 'grouping': 'verylightgrouping' } dataset_options_train = DatasetOptions(dict_data_train) diag_group_names = dataset_options_train.getDiagGroupNames() indices_diag_codes = getDiagCodesIndices(diag_group_names) main_groups = icd10_chapters.getMainGroups() num_colors = len(main_groups) colors = plt.cm.rainbow(np.linspace(0, 1, num_colors)) num_diags = len(indices_diag_codes) filenames_encodings = glob.glob(dir_model + 'basic_encodings_*') var_encodings = [] for l, f in enumerate(sorted(filenames_encodings)): print(f) epoch = int(f.split('/')[-1].split('.')[0].split('_')[-1]) print('epoch: ' + str(epoch)) basic_encodings = np.load(f)
def analyze(flags_obj): """Run Wide-Deep training and eval loop. Args: flags_obj: An object containing parsed flag values. """ dict_data_train = { 'dir_data': DIRPROJECT + 'data/', 'data_prefix': 'nz', 'dataset': '20072016', 'encoding': 'embedding', 'newfeatures': None, 'featurereduction': { 'method': 'FUSION' }, 'grouping': 'verylightgrouping' } dataset_options_train = DatasetOptions(dict_data_train) dataset_options_eval = None if dict_data_train['data_prefix'] == 'nz': feature_columns = FeatureColumnsAutoEncoderNZ( dataset_options=dataset_options_train) else: print('unknown data prefix..exit') sys.exit() dict_dataset_options = { 'train': dataset_options_train, 'eval': dataset_options_eval, 'test': None } nn = AutoEncoderModel('analysis', dict_dataset_options, feature_columns, flags_obj) basic_encodings = nn.analyze() num_colors = 26 colors = plt.cm.rainbow(np.linspace(0, 1, num_colors)) pca = PCA(n_components=2) weights_2d_pca = pca.fit_transform(basic_encodings) tsne = TSNE(n_components=2) weights_2d_tsne = tsne.fit_transform(basic_encodings) diag_group_names = dataset_options_train.getDiagGroupNames() num_diags = len(diag_group_names) if dataset_options_train.getGroupingName() == 'verylightgrouping': num_subcategories = 100 elif dataset_options_train.getGroupingName() == 'lightgrouping': num_subcategories = 10 elif dataset_options_train.getGroupingName() == 'grouping': num_subcategories = 1 else: print('grouping scheme is unknown...exit') sys.exit() plt.figure() for k in range(0, num_colors): c = colors[k] plt.scatter( weights_2d_pca[k * num_subcategories:(k * num_subcategories + num_subcategories), 0], weights_2d_pca[k * num_subcategories:(k * num_subcategories + num_subcategories), 1], label=string.ascii_uppercase[k], alpha=0.5, s=100, c=c) plt.legend() plt.title('pca') plt.draw() plt.figure() for k in range(0, num_colors): c = colors[k] plt.scatter( weights_2d_tsne[k * num_subcategories:(k * num_subcategories + num_subcategories), 0], weights_2d_tsne[k * num_subcategories:(k * num_subcategories + num_subcategories), 1], label=string.ascii_uppercase[k], alpha=0.5, s=100, c=c) plt.legend() plt.title('t-sne') plt.draw() plt.show()