def main(): args = get_arguments() np.random.seed(args.random_seed) from molecules.model import MoleculeVAE from molecules.utils import one_hot_array, one_hot_index, from_one_hot_array, \ decode_smiles_from_indexes, load_dataset from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau data_train, data_test, charset = load_dataset(args.data) model = MoleculeVAE() if os.path.isfile(args.model): model.load(charset, args.model, latent_rep_size=args.latent_dim) else: model.create(charset, latent_rep_size=args.latent_dim) checkpointer = ModelCheckpoint(filepath=args.model, verbose=1, save_best_only=True) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.0001) model.autoencoder.fit(data_train, data_train, shuffle=True, epochs=args.epochs, batch_size=args.batch_size, callbacks=[checkpointer, reduce_lr], validation_data=(data_test, data_test))
def main(): np.random.seed(RANDOM_SEED) data_train, data_test, charset = load_dataset('data/processed.h5') print("Charset", charset) model = MoleculeVAE() model.create(charset, latent_rep_size=292) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.0001) checkpointer = ModelCheckpoint(filepath='model.h5', verbose=1, save_best_only=True) history = model.autoencoder.fit(data_train[:1000], data_train[:1000], shuffle=True, nb_epoch=NUM_EPOCHS, batch_size=100, callbacks=[checkpointer, reduce_lr], validation_data=(data_test[:1000], data_test[:1000])) with open('trainHistoryDict', 'wb') as file_pi: pickle.dump(history.history, file_pi)
def main(): args = get_arguments() data_train, data_test, charset = load_dataset(args.data) model = MoleculeVAE() if os.path.isfile(args.model): model.load(charset, args.model, latent_rep_size=args.latent_dim) else: model.create(charset, latent_rep_size=args.latent_dim) checkpointer = ModelCheckpoint(filepath=args.model, verbose=1, save_best_only=True) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.0001) model.autoencoder.fit(data_train, data_train, shuffle=True, epochs=args.epochs, batch_size=args.batch_size, callbacks=[checkpointer, reduce_lr], validation_data=(data_test, data_test))
def decoder(model): latent_dim = 292 data_train, data_test, charset = load_dataset('data/processed.h5') #Here we directly raise exceptions for non-existent model if os.path.isfile('model.h5'): model.load(charset, 'model.h5', latent_rep_size=latent_dim) print("model loaded") else: raise ValueError("Model file doesn't exist") samples_all = [] with open('encoded_vec.csv', 'r') as csvfile: #good dataset/data2.csv reader = csv.reader(csvfile) rows = [row for row in reader] data = np.array(rows, dtype=float) for ix in range(len(data)): sampled = model.decoder.predict(data[ix].reshape( -1, 292)).argmax(axis=2)[0] #sampled=data_test[ix].argmax(axis=1) print(sampled) #print(np.shape(sampled)) sampled = decode_smiles_from_indexes(sampled, charset) print(np.shape(sampled)) samples_all.append(sampled) print(sampled) with open('decoded_vec.csv', 'w') as f: writer = csv.writer(f) writer.writerows(np.array(samples_all))
def encoder(model): #latent_dim = args.latent_dim latent_dim = 292 data_train, data_test, charset = load_dataset('data/processed.h5') print(np.shape(data_train)) if os.path.isfile('model.h5'): model.load(charset, 'model.h5', latent_rep_size=latent_dim) else: raise ValueError("Model file doesn't exist") print(model.encoder.predict(data_train[0:2])) m = model.encoder.predict(data_train[0:100]) print(np.shape(m)) with open('encoded_vec.csv', 'w') as f: writer = csv.writer(f) writer.writerows(m) '''true_pred_gen = (((mat, weight, model.encoder.predict(mat)) for (mat, _, weight) in train_gen)) h5f = h5py.File('encoded.h5', 'w') h5f.create_dataset('charset', data = charset) h5f.create_dataset('latent_vectors', (NUM_SAMPLED, 120, latent_dim)) for ix in range(NUM_SAMPLED): _, _, x_latent = true_pred_gen.next() print(x_latent[0]) h5f['latent_vectors'][ix] = x_latent[0] h5f.close()''' '''else:
def main(): args = get_arguments() np.random.seed(args.random_seed) from molecules.model import MoleculeVAE from molecules.utils import one_hot_array, one_hot_index, from_one_hot_array, \ decode_smiles_from_indexes, load_dataset from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau data_train, data_test, charset = load_dataset(args.data) model = MoleculeVAE() if os.path.isfile(args.model): model.load(charset, args.model, latent_rep_size = args.latent_dim) else: model.create(charset, latent_rep_size = args.latent_dim) checkpointer = ModelCheckpoint(filepath = args.model, verbose = 1, save_best_only = True) reduce_lr = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.2, patience = 3, min_lr = 0.0001) model.autoencoder.fit( data_train, data_train, shuffle = True, nb_epoch = args.epochs, batch_size = args.batch_size, callbacks = [checkpointer, reduce_lr], validation_data = (data_test, data_test) )
def visualize_model(args): model = MoleculeVAE() data, charset = load_dataset(args.data, split = False) if os.path.isfile(args.model): model.load(charset, args.model) else: raise ValueError("Model file %s doesn't exist" % args.model) plot(model.autoencoder, to_file = args.outfile)
def autoencoder(args, model): latent_dim = args.latent_dim data, charset = load_dataset(args.data, split = False) if os.path.isfile(args.model): model.load(charset, args.model, latent_rep_size = latent_dim) else: raise ValueError("Model file %s doesn't exist" % args.model) sampled = model.autoencoder.predict(data[0].reshape(1, 120, len(charset))).argmax(axis=2)[0] mol = decode_smiles_from_indexes(map(from_one_hot_array, data[0]), charset) sampled = decode_smiles_from_indexes(sampled, charset) print(mol) print(sampled)
def autoencoder(args, model): latent_dim = args.latent_dim data, charset = load_dataset(args.data, split=False) if os.path.isfile(args.model): model.load(charset, args.model, latent_rep_size=latent_dim) else: raise ValueError("Model file %s doesn't exist" % args.model) sampled = model.autoencoder.predict(data[0].reshape( 1, 120, len(charset))).argmax(axis=2)[0] mol = decode_smiles_from_indexes(map(from_one_hot_array, data[0]), charset) sampled = decode_smiles_from_indexes(sampled, charset) print(mol) print(sampled)
def encoder(args, model): latent_dim = args.latent_dim data, charset = load_dataset(args.data, split=False) if os.path.isfile(args.model): model.load(charset, args.model, latent_rep_size=latent_dim) else: raise ValueError("Model file %s doesn't exist" % args.model) x_latent = model.encoder.predict(data) if args.save_h5: h5f = h5py.File(args.save_h5, 'w') h5f.create_dataset('charset', data=charset) h5f.create_dataset('latent_vectors', data=x_latent) h5f.close() else: np.savetxt(sys.stdout, x_latent, delimiter='\t')
def main(): args = get_arguments() np.random.seed(args.random_seed) from molecules.model import MoleculeVAE, SimpleMoleculeVAE from molecules.utils import one_hot_array, one_hot_index, from_one_hot_array, \ decode_smiles_from_indexes, load_dataset from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau if args.num_cores != -1: config = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1, \ allow_soft_placement=True, device_count = {'CPU': args.num_cores}) session = tf.Session(config=config) K.set_session(session) data_train, data_test, charset = load_dataset(args.data) if args.simple: model = SimpleMoleculeVAE() else: model = MoleculeVAE() if os.path.isfile(args.model): model.load(charset, args.model, latent_rep_size=args.latent_dim) else: model.create(charset, latent_rep_size=args.latent_dim) checkpointer = ModelCheckpoint(filepath=args.model, verbose=1, save_best_only=True) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.0001) # plot_model(model, to_file='model.png') history = model.autoencoder.fit(data_train, data_train, shuffle=True, epochs=args.epochs, batch_size=args.batch_size, callbacks=[checkpointer, reduce_lr], validation_data=(data_test, data_test)) with open('history.p', 'wb') as f: cPickle.dump(history.history, f)
def encoder(args, model): latent_dim = args.latent_dim data, charset = load_dataset(args.data, split = False) if os.path.isfile(args.model): model.load(charset, args.model, latent_rep_size = latent_dim) else: raise ValueError("Model file %s doesn't exist" % args.model) x_latent = model.encoder.predict(data) if args.save_h5: h5f = h5py.File(args.save_h5, 'w') h5f.create_dataset('charset', data = charset) h5f.create_dataset('latent_vectors', data = x_latent) h5f.close() else: np.savetxt(sys.stdout, x_latent, delimiter = '\t')
def main(): args = get_arguments() model = MoleculeVAE() data, data_test, charset = load_dataset(args.data) if os.path.isfile(args.model): model.load(charset, args.model, latent_rep_size = args.latent_dim) else: raise ValueError("Model file %s doesn't exist" % args.model) x_latent = model.encoder.predict(data) if not args.visualize: if not args.save_h5: np.savetxt(sys.stdout, x_latent, delimiter = '\t') else: h5f = h5py.File(args.save_h5, 'w') h5f.create_dataset('charset', data = charset) h5f.create_dataset('latent_vectors', data = x_latent) h5f.close() else: visualize_latent_rep(args, model, x_latent)
def main(): args = get_arguments() model = MoleculeVAE() data, data_test, charset = load_dataset(args.data) if os.path.isfile(args.model): model.load(charset, args.model, latent_rep_size=args.latent_dim) else: raise ValueError("Model file %s doesn't exist" % args.model) x_latent = model.encoder.predict(data) if not args.visualize: if not args.save_h5: np.savetxt(sys.stdout, x_latent, delimiter='\t') else: h5f = h5py.File(args.save_h5, 'w') h5f.create_dataset('charset', data=charset) h5f.create_dataset('latent_vectors', data=x_latent) h5f.close() else: visualize_latent_rep(args, model, x_latent)
os.environ['KERAS_BACKEND'] = 'tensorflow' import h5py import numpy as np from molecules.model import MoleculeVAE from molecules.utils import one_hot_array, one_hot_index, from_one_hot_array, \ decode_smiles_from_indexes, load_dataset from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau NUM_EPOCHS = 100 BATCH_SIZE = 10 LATENT_DIM = 128 RANDOM_SEED = 123 np.random.seed(RANDOM_SEED) #args.random_seed) data_train, data_test, charset = load_dataset('./data/processed.h5') model = MoleculeVAE() #model.load(charset, args.model, latent_rep_size = args.latent_dim) model.create(charset, latent_rep_size=LATENT_DIM) checkpointer = ModelCheckpoint( filepath='./test_models/weights.{epoch:02d}-{val_loss:.2f}.hdf5', verbose=1, save_best_only=True) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.0001) data_train = data_train[:1]