Exemplo n.º 1
0
def main():
    args = get_arguments()
    data_train, data_test, charset = load_dataset(args.data)
    model = MoleculeVAE()
    if os.path.isfile(args.model):
        model.load(charset, args.model, latent_rep_size=args.latent_dim)
    else:
        model.create(charset, latent_rep_size=args.latent_dim)

    checkpointer = ModelCheckpoint(filepath=args.model,
                                   verbose=1,
                                   save_best_only=True)

    reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                  factor=0.2,
                                  patience=3,
                                  min_lr=0.0001)

    model.autoencoder.fit(data_train,
                          data_train,
                          shuffle=True,
                          epochs=args.epochs,
                          batch_size=args.batch_size,
                          callbacks=[checkpointer, reduce_lr],
                          validation_data=(data_test, data_test))
Exemplo n.º 2
0
def main():
    args = get_arguments()
    np.random.seed(args.random_seed)

    from molecules.model import MoleculeVAE
    from molecules.utils import one_hot_array, one_hot_index, from_one_hot_array, \
        decode_smiles_from_indexes, load_dataset
    from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau

    data_train, data_test, charset = load_dataset(args.data)
    model = MoleculeVAE()
    if os.path.isfile(args.model):
        model.load(charset, args.model, latent_rep_size=args.latent_dim)
    else:
        model.create(charset, latent_rep_size=args.latent_dim)

    checkpointer = ModelCheckpoint(filepath=args.model,
                                   verbose=1,
                                   save_best_only=True)

    reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                  factor=0.2,
                                  patience=3,
                                  min_lr=0.0001)

    model.autoencoder.fit(data_train,
                          data_train,
                          shuffle=True,
                          epochs=args.epochs,
                          batch_size=args.batch_size,
                          callbacks=[checkpointer, reduce_lr],
                          validation_data=(data_test, data_test))
Exemplo n.º 3
0
def main():
    np.random.seed(RANDOM_SEED)

    data_train, data_test, charset = load_dataset('data/processed.h5')
    print("Charset", charset)
    model = MoleculeVAE()
    model.create(charset, latent_rep_size=292)

    reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                  factor=0.2,
                                  patience=3,
                                  min_lr=0.0001)
    checkpointer = ModelCheckpoint(filepath='model.h5',
                                   verbose=1,
                                   save_best_only=True)

    history = model.autoencoder.fit(data_train[:1000],
                                    data_train[:1000],
                                    shuffle=True,
                                    nb_epoch=NUM_EPOCHS,
                                    batch_size=100,
                                    callbacks=[checkpointer, reduce_lr],
                                    validation_data=(data_test[:1000],
                                                     data_test[:1000]))
    with open('trainHistoryDict', 'wb') as file_pi:
        pickle.dump(history.history, file_pi)
Exemplo n.º 4
0
def main():
    args = get_arguments()
    np.random.seed(args.random_seed)

    from molecules.model import MoleculeVAE
    from molecules.utils import one_hot_array, one_hot_index, from_one_hot_array, \
        decode_smiles_from_indexes, load_dataset
    from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
    
    data_train, data_test, charset = load_dataset(args.data)
    model = MoleculeVAE()
    if os.path.isfile(args.model):
        model.load(charset, args.model, latent_rep_size = args.latent_dim)
    else:
        model.create(charset, latent_rep_size = args.latent_dim)

    checkpointer = ModelCheckpoint(filepath = args.model,
                                   verbose = 1,
                                   save_best_only = True)

    reduce_lr = ReduceLROnPlateau(monitor = 'val_loss',
                                  factor = 0.2,
                                  patience = 3,
                                  min_lr = 0.0001)

    model.autoencoder.fit(
        data_train,
        data_train,
        shuffle = True,
        nb_epoch = args.epochs,
        batch_size = args.batch_size,
        callbacks = [checkpointer, reduce_lr],
        validation_data = (data_test, data_test)
    )
Exemplo n.º 5
0
def main():
    args = get_arguments()
    np.random.seed(args.random_seed)

    from molecules.model import MoleculeVAE
    from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau

    data = pd.read_hdf(args.data, 'table')
    structures = data['structure']

    # import gzip
    # filepath = args.data
    # structures = [line.split()[0].strip() for line in gzip.open(filepath) if line]

    # can also use CanonicalSmilesDataGenerator
    datobj = SmilesDataGenerator(structures, MAX_LEN,
                                 test_split=args.test_split,
                                 random_seed=args.random_seed)
    test_divisor = int((1 - datobj.test_split) / (datobj.test_split))
    train_gen = datobj.train_generator(args.batch_size)
    test_gen = datobj.test_generator(args.batch_size)

    # reformulate generators to not use weights
    train_gen = ((tens, tens) for (tens, _, weights) in train_gen)
    test_gen = ((tens, tens) for (tens, _, weights) in test_gen)

    model = MoleculeVAE()
    if os.path.isfile(args.model):
        model.load(datobj.chars, args.model, latent_rep_size = args.latent_dim)
    else:
        model.create(datobj.chars, latent_rep_size = args.latent_dim)

    checkpointer = ModelCheckpoint(filepath = args.model,
                                   verbose = 1,
                                   save_best_only = True)

    reduce_lr = ReduceLROnPlateau(monitor = 'val_loss',
                                  factor = 0.2,
                                  patience = 3,
                                  min_lr = 0.0001)

    model.autoencoder.fit_generator(
        train_gen,
        args.epoch_size,
        epochs = args.epochs,
        callbacks = [checkpointer, reduce_lr],
        validation_data = test_gen,
        nb_val_samples = args.epoch_size / test_divisor,
        pickle_safe = True
    )
Exemplo n.º 6
0
def main():
    args = get_arguments()
    np.random.seed(args.random_seed)
    
    from molecules.model import MoleculeVAE
    from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
    
    data = pd.read_hdf(args.data, 'table')
    structures = data['structure']

    # import gzip
    # filepath = args.data
    # structures = [line.split()[0].strip() for line in gzip.open(filepath) if line]

    # can also use CanonicalSmilesDataGenerator
    datobj = SmilesDataGenerator(structures, MAX_LEN,
                                 test_split=args.test_split,
                                 random_seed=args.random_seed)
    test_divisor = int((1 - datobj.test_split) / (datobj.test_split))
    train_gen = datobj.train_generator(args.batch_size)
    test_gen = datobj.test_generator(args.batch_size)

    # reformulate generators to not use weights
    train_gen = ((tens, tens) for (tens, _, weights) in train_gen)
    test_gen = ((tens, tens) for (tens, _, weights) in test_gen)

    model = MoleculeVAE()
    if os.path.isfile(args.model):
        model.load(datobj.chars, args.model, latent_rep_size = args.latent_dim)
    else:
        model.create(datobj.chars, latent_rep_size = args.latent_dim)

    checkpointer = ModelCheckpoint(filepath = args.model,
                                   verbose = 1,
                                   save_best_only = True)

    reduce_lr = ReduceLROnPlateau(monitor = 'val_loss',
                                  factor = 0.2,
                                  patience = 3,
                                  min_lr = 0.0001)

    model.autoencoder.fit_generator(
        train_gen,
        args.epoch_size,
        nb_epoch = args.epochs,
        callbacks = [checkpointer, reduce_lr],
        validation_data = test_gen,
        nb_val_samples = args.epoch_size / test_divisor,
        pickle_safe = True
    )
Exemplo n.º 7
0
from molecules.model import MoleculeVAE
from molecules.utils import one_hot_array, one_hot_index, from_one_hot_array, \
    decode_smiles_from_indexes, load_dataset
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau

NUM_EPOCHS = 100
BATCH_SIZE = 10
LATENT_DIM = 128
RANDOM_SEED = 123

np.random.seed(RANDOM_SEED)  #args.random_seed)

data_train, data_test, charset = load_dataset('./data/processed.h5')
model = MoleculeVAE()
#model.load(charset, args.model, latent_rep_size = args.latent_dim)
model.create(charset, latent_rep_size=LATENT_DIM)

checkpointer = ModelCheckpoint(
    filepath='./test_models/weights.{epoch:02d}-{val_loss:.2f}.hdf5',
    verbose=1,
    save_best_only=True)

reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                              factor=0.2,
                              patience=3,
                              min_lr=0.0001)

data_train = data_train[:1]
model.autoencoder.fit(data_train,
                      data_train,
                      shuffle=True,