示例#1
0
def main():
    args = get_arguments()
    np.random.seed(args.random_seed)

    from molecules.model import MoleculeVAE
    from molecules.utils import one_hot_array, one_hot_index, from_one_hot_array, \
        decode_smiles_from_indexes, load_dataset
    from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau

    data_train, data_test, charset = load_dataset(args.data)
    model = MoleculeVAE()
    if os.path.isfile(args.model):
        model.load(charset, args.model, latent_rep_size=args.latent_dim)
    else:
        model.create(charset, latent_rep_size=args.latent_dim)

    checkpointer = ModelCheckpoint(filepath=args.model,
                                   verbose=1,
                                   save_best_only=True)

    reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                  factor=0.2,
                                  patience=3,
                                  min_lr=0.0001)

    model.autoencoder.fit(data_train,
                          data_train,
                          shuffle=True,
                          epochs=args.epochs,
                          batch_size=args.batch_size,
                          callbacks=[checkpointer, reduce_lr],
                          validation_data=(data_test, data_test))
示例#2
0
def main():
    np.random.seed(RANDOM_SEED)

    data_train, data_test, charset = load_dataset('data/processed.h5')
    print("Charset", charset)
    model = MoleculeVAE()
    model.create(charset, latent_rep_size=292)

    reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                  factor=0.2,
                                  patience=3,
                                  min_lr=0.0001)
    checkpointer = ModelCheckpoint(filepath='model.h5',
                                   verbose=1,
                                   save_best_only=True)

    history = model.autoencoder.fit(data_train[:1000],
                                    data_train[:1000],
                                    shuffle=True,
                                    nb_epoch=NUM_EPOCHS,
                                    batch_size=100,
                                    callbacks=[checkpointer, reduce_lr],
                                    validation_data=(data_test[:1000],
                                                     data_test[:1000]))
    with open('trainHistoryDict', 'wb') as file_pi:
        pickle.dump(history.history, file_pi)
示例#3
0
def main():
    args = get_arguments()
    data_train, data_test, charset = load_dataset(args.data)
    model = MoleculeVAE()
    if os.path.isfile(args.model):
        model.load(charset, args.model, latent_rep_size=args.latent_dim)
    else:
        model.create(charset, latent_rep_size=args.latent_dim)

    checkpointer = ModelCheckpoint(filepath=args.model,
                                   verbose=1,
                                   save_best_only=True)

    reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                  factor=0.2,
                                  patience=3,
                                  min_lr=0.0001)

    model.autoencoder.fit(data_train,
                          data_train,
                          shuffle=True,
                          epochs=args.epochs,
                          batch_size=args.batch_size,
                          callbacks=[checkpointer, reduce_lr],
                          validation_data=(data_test, data_test))
示例#4
0
def decoder(model):
    latent_dim = 292
    data_train, data_test, charset = load_dataset('data/processed.h5')

    #Here we directly raise exceptions for non-existent model
    if os.path.isfile('model.h5'):
        model.load(charset, 'model.h5', latent_rep_size=latent_dim)
        print("model loaded")
    else:
        raise ValueError("Model file doesn't exist")

    samples_all = []
    with open('encoded_vec.csv', 'r') as csvfile:  #good dataset/data2.csv
        reader = csv.reader(csvfile)
        rows = [row for row in reader]
    data = np.array(rows, dtype=float)

    for ix in range(len(data)):
        sampled = model.decoder.predict(data[ix].reshape(
            -1, 292)).argmax(axis=2)[0]
        #sampled=data_test[ix].argmax(axis=1)
        print(sampled)
        #print(np.shape(sampled))
        sampled = decode_smiles_from_indexes(sampled, charset)
        print(np.shape(sampled))
        samples_all.append(sampled)
        print(sampled)

    with open('decoded_vec.csv', 'w') as f:
        writer = csv.writer(f)
        writer.writerows(np.array(samples_all))
示例#5
0
def encoder(model):
    #latent_dim = args.latent_dim
    latent_dim = 292
    data_train, data_test, charset = load_dataset('data/processed.h5')
    print(np.shape(data_train))

    if os.path.isfile('model.h5'):
        model.load(charset, 'model.h5', latent_rep_size=latent_dim)
    else:
        raise ValueError("Model file doesn't exist")

    print(model.encoder.predict(data_train[0:2]))
    m = model.encoder.predict(data_train[0:100])
    print(np.shape(m))
    with open('encoded_vec.csv', 'w') as f:
        writer = csv.writer(f)
        writer.writerows(m)
    '''true_pred_gen = (((mat, weight, model.encoder.predict(mat))
                      for (mat, _, weight) in train_gen))

    h5f = h5py.File('encoded.h5', 'w')
    h5f.create_dataset('charset', data = charset)
    h5f.create_dataset('latent_vectors', (NUM_SAMPLED, 120, latent_dim))
    for ix in range(NUM_SAMPLED):
      _, _, x_latent = true_pred_gen.next()
      print(x_latent[0])
      h5f['latent_vectors'][ix] = x_latent[0]
    h5f.close()'''
    '''else:
示例#6
0
def main():
    args = get_arguments()
    np.random.seed(args.random_seed)

    from molecules.model import MoleculeVAE
    from molecules.utils import one_hot_array, one_hot_index, from_one_hot_array, \
        decode_smiles_from_indexes, load_dataset
    from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
    
    data_train, data_test, charset = load_dataset(args.data)
    model = MoleculeVAE()
    if os.path.isfile(args.model):
        model.load(charset, args.model, latent_rep_size = args.latent_dim)
    else:
        model.create(charset, latent_rep_size = args.latent_dim)

    checkpointer = ModelCheckpoint(filepath = args.model,
                                   verbose = 1,
                                   save_best_only = True)

    reduce_lr = ReduceLROnPlateau(monitor = 'val_loss',
                                  factor = 0.2,
                                  patience = 3,
                                  min_lr = 0.0001)

    model.autoencoder.fit(
        data_train,
        data_train,
        shuffle = True,
        nb_epoch = args.epochs,
        batch_size = args.batch_size,
        callbacks = [checkpointer, reduce_lr],
        validation_data = (data_test, data_test)
    )
示例#7
0
def visualize_model(args):
    model = MoleculeVAE()

    data, charset = load_dataset(args.data, split = False)

    if os.path.isfile(args.model):
        model.load(charset, args.model)
    else:
        raise ValueError("Model file %s doesn't exist" % args.model)

    plot(model.autoencoder, to_file = args.outfile)
示例#8
0
def visualize_model(args):
    model = MoleculeVAE()

    data, charset = load_dataset(args.data, split = False)

    if os.path.isfile(args.model):
        model.load(charset, args.model)
    else:
        raise ValueError("Model file %s doesn't exist" % args.model)

    plot(model.autoencoder, to_file = args.outfile)
示例#9
0
def autoencoder(args, model):
    latent_dim = args.latent_dim
    data, charset = load_dataset(args.data, split = False)

    if os.path.isfile(args.model):
        model.load(charset, args.model, latent_rep_size = latent_dim)
    else:
        raise ValueError("Model file %s doesn't exist" % args.model)

    sampled = model.autoencoder.predict(data[0].reshape(1, 120, len(charset))).argmax(axis=2)[0]
    mol = decode_smiles_from_indexes(map(from_one_hot_array, data[0]), charset)
    sampled = decode_smiles_from_indexes(sampled, charset)
    print(mol)
    print(sampled)
示例#10
0
def autoencoder(args, model):
    latent_dim = args.latent_dim
    data, charset = load_dataset(args.data, split=False)

    if os.path.isfile(args.model):
        model.load(charset, args.model, latent_rep_size=latent_dim)
    else:
        raise ValueError("Model file %s doesn't exist" % args.model)

    sampled = model.autoencoder.predict(data[0].reshape(
        1, 120, len(charset))).argmax(axis=2)[0]
    mol = decode_smiles_from_indexes(map(from_one_hot_array, data[0]), charset)
    sampled = decode_smiles_from_indexes(sampled, charset)
    print(mol)
    print(sampled)
示例#11
0
def encoder(args, model):
    latent_dim = args.latent_dim
    data, charset = load_dataset(args.data, split=False)

    if os.path.isfile(args.model):
        model.load(charset, args.model, latent_rep_size=latent_dim)
    else:
        raise ValueError("Model file %s doesn't exist" % args.model)

    x_latent = model.encoder.predict(data)
    if args.save_h5:
        h5f = h5py.File(args.save_h5, 'w')
        h5f.create_dataset('charset', data=charset)
        h5f.create_dataset('latent_vectors', data=x_latent)
        h5f.close()
    else:
        np.savetxt(sys.stdout, x_latent, delimiter='\t')
示例#12
0
def main():
    args = get_arguments()
    np.random.seed(args.random_seed)

    from molecules.model import MoleculeVAE, SimpleMoleculeVAE
    from molecules.utils import one_hot_array, one_hot_index, from_one_hot_array, \
        decode_smiles_from_indexes, load_dataset
    from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau

    if args.num_cores != -1:
        config = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1, \
                                allow_soft_placement=True, device_count = {'CPU': args.num_cores})
        session = tf.Session(config=config)
        K.set_session(session)

    data_train, data_test, charset = load_dataset(args.data)

    if args.simple:
        model = SimpleMoleculeVAE()
    else:
        model = MoleculeVAE()

    if os.path.isfile(args.model):
        model.load(charset, args.model, latent_rep_size=args.latent_dim)
    else:
        model.create(charset, latent_rep_size=args.latent_dim)

    checkpointer = ModelCheckpoint(filepath=args.model,
                                   verbose=1,
                                   save_best_only=True)

    reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                  factor=0.2,
                                  patience=3,
                                  min_lr=0.0001)

    # plot_model(model, to_file='model.png')
    history = model.autoencoder.fit(data_train,
                                    data_train,
                                    shuffle=True,
                                    epochs=args.epochs,
                                    batch_size=args.batch_size,
                                    callbacks=[checkpointer, reduce_lr],
                                    validation_data=(data_test, data_test))
    with open('history.p', 'wb') as f:
        cPickle.dump(history.history, f)
示例#13
0
def encoder(args, model):
    latent_dim = args.latent_dim
    data, charset = load_dataset(args.data, split = False)

    if os.path.isfile(args.model):
        model.load(charset, args.model, latent_rep_size = latent_dim)
    else:
        raise ValueError("Model file %s doesn't exist" % args.model)

    x_latent = model.encoder.predict(data)
    if args.save_h5:
        h5f = h5py.File(args.save_h5, 'w')
        h5f.create_dataset('charset', data = charset)
        h5f.create_dataset('latent_vectors', data = x_latent)
        h5f.close()
    else:
        np.savetxt(sys.stdout, x_latent, delimiter = '\t')
示例#14
0
def main():
    args = get_arguments()
    model = MoleculeVAE()

    data, data_test, charset = load_dataset(args.data)

    if os.path.isfile(args.model):
        model.load(charset, args.model, latent_rep_size = args.latent_dim)
    else:
        raise ValueError("Model file %s doesn't exist" % args.model)

    x_latent = model.encoder.predict(data)
    if not args.visualize:
        if not args.save_h5:
            np.savetxt(sys.stdout, x_latent, delimiter = '\t')
        else:
            h5f = h5py.File(args.save_h5, 'w')
            h5f.create_dataset('charset', data = charset)
            h5f.create_dataset('latent_vectors', data = x_latent)
            h5f.close()
    else:
        visualize_latent_rep(args, model, x_latent)
def main():
    args = get_arguments()
    model = MoleculeVAE()

    data, data_test, charset = load_dataset(args.data)

    if os.path.isfile(args.model):
        model.load(charset, args.model, latent_rep_size=args.latent_dim)
    else:
        raise ValueError("Model file %s doesn't exist" % args.model)

    x_latent = model.encoder.predict(data)
    if not args.visualize:
        if not args.save_h5:
            np.savetxt(sys.stdout, x_latent, delimiter='\t')
        else:
            h5f = h5py.File(args.save_h5, 'w')
            h5f.create_dataset('charset', data=charset)
            h5f.create_dataset('latent_vectors', data=x_latent)
            h5f.close()
    else:
        visualize_latent_rep(args, model, x_latent)
示例#16
0
os.environ['KERAS_BACKEND'] = 'tensorflow'
import h5py
import numpy as np
from molecules.model import MoleculeVAE
from molecules.utils import one_hot_array, one_hot_index, from_one_hot_array, \
    decode_smiles_from_indexes, load_dataset
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau

NUM_EPOCHS = 100
BATCH_SIZE = 10
LATENT_DIM = 128
RANDOM_SEED = 123

np.random.seed(RANDOM_SEED)  #args.random_seed)

data_train, data_test, charset = load_dataset('./data/processed.h5')
model = MoleculeVAE()
#model.load(charset, args.model, latent_rep_size = args.latent_dim)
model.create(charset, latent_rep_size=LATENT_DIM)

checkpointer = ModelCheckpoint(
    filepath='./test_models/weights.{epoch:02d}-{val_loss:.2f}.hdf5',
    verbose=1,
    save_best_only=True)

reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                              factor=0.2,
                              patience=3,
                              min_lr=0.0001)

data_train = data_train[:1]