tf.gather(tf.constant(types), x)), y)) ds_test = tf.data.Dataset.from_tensor_slices((idx_test, y_label[idx_test])) ds_test = ds_test.batch(len(idx_test), drop_remainder=False) ds_test = ds_test.map(lambda x, y: ( (five_p_loader(x, ragged_output=True), three_p_loader( x, ragged_output=True), ref_loader(x, ragged_output=True), alt_loader(x, ragged_output=True), strand_loader(x, ragged_output=True), tf.gather(tf.constant(types), x)), y)) histories = [] evaluations = [] weights = [] for i in range(3): sequence_encoder = InstanceModels.VariantSequence(6, 4, 2, [16, 16, 8, 8]) sample_encoder = SampleModels.Type(shape=(), dim=len(np.unique(types))) # mil = RaggedModels.MIL(instance_encoders=[sequence_encoder.model], sample_encoders=[sample_encoder.model], sample_layers=[64, ], output_dim=1, pooling='both', output_type='other', pooled_layers=[32, ]) mil = RaggedModels.MIL(instance_encoders=[sequence_encoder.model], sample_encoders=[sample_encoder.model], fusion='before', output_dim=1, pooling='both', output_type='other', pooled_layers=[ 32, ]) losses = ['mse'] mil.model.compile(loss=losses, metrics=['mse'], optimizer=tf.keras.optimizers.Adam( learning_rate=0.001, ))
##if using tcga cancer types y_strat = np.argmax(samples['histology'], axis=-1) ##if using NCI-T labels # label_counts = sample_df['NCI-T Label'].value_counts().to_dict() # mask = sample_df['NCI-T Label'].map(lambda x: label_counts.get(x, 0) >= 36) # y_label = y_label[mask] # counts = counts[mask] # labels = [i for i in sorted(label_counts.keys()) if label_counts[i] >= 36] # y_strat = sample_df['NCI-T Label'][mask].map(lambda x: labels.index(x)).values losses = [Losses.QuantileLoss()] metrics = [Metrics.QuantileLoss()] pass_encoder = InstanceModels.PassThrough(shape=(1, )) type_encoder = SampleModels.Type(shape=(), dim=max(y_strat) + 1) weights = pickle.load( open( cwd / 'figures' / 'tmb' / 'tcga' / 'MDA_409' / 'results' / 'run_naive_sample_tcga.pkl', 'rb')) mil = RaggedModels.MIL( sample_encoders=[pass_encoder.model, type_encoder.model], output_dim=1, mil_hidden=(64, 32, 16), output_type='quantiles', regularization=0, mode='none') ##test eval