Пример #1
0
    [pos_loader, bin_loader, chr_loader],
    [five_p_loader, three_p_loader, ref_loader, alt_loader, strand_loader],
]

# set y label
y_label = np.log(
    sample_df['non_syn_counts'].values /
    (panels.loc[panels['Panel'] == 'Agilent_kit']['cds'].values[0] / 1e6) +
    1)[:, np.newaxis]
y_strat = np.argmax(samples['histology'], axis=-1)

losses = [Losses.QuantileLoss()]
metrics = [Metrics.QuantileLoss()]

encoders = [
    InstanceModels.PassThrough(shape=(1, )),
    InstanceModels.VariantPositionBin(24, 100),
    InstanceModels.VariantSequence(6,
                                   4,
                                   2, [16, 16, 8, 8],
                                   fusion_dimension=32)
]

all_weights = [
    pickle.load(
        open(
            cwd / 'figures' / 'tmb' / 'tcga' / 'VICC_01_R2' / 'results' /
            'run_naive.pkl', 'rb')),
    pickle.load(
        open(
            cwd / 'figures' / 'tmb' / 'tcga' / 'VICC_01_R2' / 'results' /
Пример #2
0
runs = 3
initial_weights = []
losses = [Losses.QuantileLoss()]
metrics = [Metrics.QuantileLoss()]
callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor='val_QL',
                                     min_delta=0.0001,
                                     patience=40,
                                     mode='min',
                                     restore_best_weights=True)
]

##for sequence
for i in range(runs):
    pass_encoder = InstanceModels.PassThrough(shape=(1, ))
    # position_encoder = InstanceModels.VariantPositionBin(24, 100)
    # sequence_encoder = InstanceModels.VariantSequence(6, 4, 2, [16, 16, 8, 8], fusion_dimension=32)
    mil = RaggedModels.MIL(instance_encoders=[pass_encoder.model],
                           output_dim=1,
                           pooling='sum',
                           mil_hidden=(64, 32, 16),
                           output_type='quantiles',
                           regularization=0)
    # mil = RaggedModels.MIL(instance_encoders=[position_encoder.model], output_dim=1, pooling='sum', mil_hidden=(64, 32, 16), output_type='quantiles', regularization=0)
    # mil = RaggedModels.MIL(instance_encoders=[sequence_encoder.model], output_dim=1, pooling='sum', mil_hidden=(64, 32, 16), output_type='quantiles', regularization=0)

    mil.model.compile(loss=losses,
                      metrics=metrics,
                      optimizer=tf.keras.optimizers.Adam(learning_rate=0.001))
    initial_weights.append(mil.model.get_weights())
Пример #3
0
ds_test = tf.data.Dataset.from_tensor_slices((idx_test, y_label[idx_test]))
ds_test = ds_test.batch(len(idx_test), drop_remainder=False)
ds_test = ds_test.map(lambda x, y: ((tf.gather(tf.constant(D['seq_5p'], dtype=tf.int32), x),
                                      tf.gather(tf.constant(D['seq_3p'], dtype=tf.int32), x),
                                      tf.gather(tf.constant(D['seq_ref'], dtype=tf.int32), x),
                                      tf.gather(tf.constant(D['seq_alt'], dtype=tf.int32), x),
                                      tf.gather(tf.constant(D['strand_emb'], dtype=tf.float32), x),
                                      tf.gather(tf.constant(D['cds_emb'], dtype=tf.float32), x)
                                       ),
                                       y,
                                      ))



sequence_encoder = InstanceModels.VariantSequence(6, 4, 2, [64, 64, 64, 64], fusion_dimension=128, use_frame=True)
mil = RaggedModels.MIL(instance_encoders=[], sample_encoders=[sequence_encoder.model], output_dim=y_label.shape[-1], output_type='other', mil_hidden=[128, 128, 64, 32], mode='none')
losses = [Losses.CrossEntropy()]
mil.model.compile(loss=losses,
                  metrics=[Metrics.Accuracy(), Metrics.CrossEntropy()],
                  weighted_metrics=[Metrics.Accuracy(), Metrics.CrossEntropy()],
                  optimizer=tf.keras.optimizers.Adam(learning_rate=0.001,
                                                     ))

callbacks = [tf.keras.callbacks.EarlyStopping(monitor='val_weighted_CE', min_delta=0.001, patience=10, mode='min', restore_best_weights=True)]


mil.model.fit(ds_train, steps_per_epoch=50,
              validation_data=ds_valid,
              epochs=10000,
              callbacks=callbacks,
Пример #4
0
ds_test = tf.data.Dataset.from_tensor_slices((idx_test, y_label[idx_test]))
ds_test = ds_test.batch(len(idx_test), drop_remainder=False)
ds_test = ds_test.map(lambda x, y: (
    (
        tf.gather(tf.constant(D['seq_5p'], dtype=tf.int32), x),
        tf.gather(tf.constant(D['seq_3p'], dtype=tf.int32), x),
        tf.gather(tf.constant(D['seq_ref'], dtype=tf.int32), x),
        tf.gather(tf.constant(D['seq_alt'], dtype=tf.int32), x),
        tf.gather(tf.constant(D['strand_emb'], dtype=tf.float32), x),
    ),
    y,
))

sequence_encoder = InstanceModels.VariantSequence(6,
                                                  4,
                                                  2, [16, 16, 8, 8],
                                                  fusion_dimension=128)
mil = RaggedModels.MIL(instance_encoders=[],
                       sample_encoders=[sequence_encoder.model],
                       output_dim=y_label.shape[-1],
                       output_type='other',
                       mil_hidden=[128, 128],
                       mode='none')
losses = [Losses.CrossEntropy()]
mil.model.compile(
    loss=losses,
    metrics=[Metrics.Accuracy(), Metrics.CrossEntropy()],
    weighted_metrics=[Metrics.Accuracy(),
                      Metrics.CrossEntropy()],
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001, ))
Пример #5
0
     alt_loader(x, ragged_output=True), strand_loader(x, ragged_output=True),
     tf.gather(tf.constant(types), x)), y))

ds_test = tf.data.Dataset.from_tensor_slices((idx_test, y_label[idx_test]))
ds_test = ds_test.batch(len(idx_test), drop_remainder=False)
ds_test = ds_test.map(lambda x, y: (
    (five_p_loader(x, ragged_output=True), three_p_loader(
        x, ragged_output=True), ref_loader(x, ragged_output=True),
     alt_loader(x, ragged_output=True), strand_loader(x, ragged_output=True),
     tf.gather(tf.constant(types), x)), y))

histories = []
evaluations = []
weights = []
for i in range(3):
    sequence_encoder = InstanceModels.VariantSequence(6, 4, 2, [16, 16, 8, 8])
    sample_encoder = SampleModels.Type(shape=(), dim=len(np.unique(types)))
    # mil = RaggedModels.MIL(instance_encoders=[sequence_encoder.model], sample_encoders=[sample_encoder.model], sample_layers=[64, ], output_dim=1, pooling='both', output_type='other', pooled_layers=[32, ])
    mil = RaggedModels.MIL(instance_encoders=[sequence_encoder.model],
                           sample_encoders=[sample_encoder.model],
                           fusion='before',
                           output_dim=1,
                           pooling='both',
                           output_type='other',
                           pooled_layers=[
                               32,
                           ])
    losses = ['mse']
    mil.model.compile(loss=losses,
                      metrics=['mse'],
                      optimizer=tf.keras.optimizers.Adam(
Пример #6
0
                                           y,
                                          tf.gather(tf.constant(y_weights, dtype=tf.float32), x)
                                           ))

    ds_valid = tf.data.Dataset.from_tensor_slices((idx_valid, y_label[idx_valid]))
    ds_valid = ds_valid.batch(len(idx_valid), drop_remainder=False)
    ds_valid = ds_valid.map(lambda x, y: ((pos_loader(x, ragged_output=True),
                                           bin_loader(x, ragged_output=True),
                                           chr_loader(x, ragged_output=True),
                                           ),
                                           y,
                                          tf.gather(tf.constant(y_weights, dtype=tf.float32), x)
                                           ))

    while True:
        position_encoder = InstanceModels.VariantPositionBin(24, 100)
        mil = RaggedModels.MIL(instance_encoders=[position_encoder.model], output_dim=2, pooling='sum', mil_hidden=(64, 32, 16, 8), output_type='anlulogits')

        mil.model.compile(loss=losses,
                          metrics=[Metrics.CrossEntropy(), Metrics.Accuracy()],
                          weighted_metrics=[Metrics.CrossEntropy(), Metrics.Accuracy()],
                          optimizer=tf.keras.optimizers.Adam(learning_rate=0.005,
                                                             clipvalue=10000))
        mil.model.fit(ds_train,
                      steps_per_epoch=20,
                      validation_data=ds_valid,
                      epochs=10000,
                      callbacks=callbacks)


        eval = mil.model.evaluate(ds_valid)
Пример #7
0
    for group, event in zip(cancer_labels, y_label[:, 1])
])
class_counts = dict(zip(*np.unique(y_strat, return_counts=True)))
y_weights = np.array([1 / class_counts[_] for _ in y_strat])
y_weights /= np.sum(y_weights)

weights = []
callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor='val_CE',
                                     min_delta=0.0001,
                                     patience=50,
                                     mode='min',
                                     restore_best_weights=True)
]
losses = [Losses.CrossEntropy(from_logits=False)]
sequence_encoder = InstanceModels.VariantSequence(20, 4, 2, [8, 8, 8, 8])
mil = RaggedModels.MIL(instance_encoders=[sequence_encoder.model],
                       output_dim=2,
                       pooling='sum',
                       mil_hidden=(64, 64, 32, 16),
                       output_type='classification_probability')
mil.model.compile(
    loss=losses,
    metrics=[Metrics.CrossEntropy(from_logits=False),
             Metrics.Accuracy()],
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001, clipvalue=10000))
initial_weights = mil.model.get_weights()

##stratified K fold for test
for idx_train, idx_test in StratifiedKFold(n_splits=9,
                                           random_state=0,
Пример #8
0
loaders = [
    [ones_loader],
]

# set y label
y_label = np.log(
    sample_df['non_syn_counts'].values /
    (panels.loc[panels['Panel'] == 'Agilent_kit']['cds'].values[0] / 1e6) +
    1)[:, np.newaxis]
y_strat = np.argmax(samples['histology'], axis=-1)

losses = [Losses.QuantileLoss()]
metrics = [Metrics.QuantileLoss()]

encoders = [
    InstanceModels.PassThrough(shape=(1, )),
]

all_weights = [
    pickle.load(
        open(
            cwd / 'figures' / 'tmb' / 'tcga' / 'nonsyn_table' / 'DFCI_ONCO' /
            'results' / 'run_naive.pkl', 'rb'))
]

results = {}

for encoder, loaders, weights, name in zip(encoders, loaders, all_weights,
                                           ['naive']):

    mil = RaggedModels.MIL(instance_encoders=[encoder.model],