Exemplo n.º 1
0
def main():
    args = get_arguments()
    data_train, data_test, charset = load_dataset(args.data)
    model = MoleculeVAE()
    if os.path.isfile(args.model):
        model.load(charset, args.model, latent_rep_size=args.latent_dim)
    else:
        model.create(charset, latent_rep_size=args.latent_dim)

    checkpointer = ModelCheckpoint(filepath=args.model,
                                   verbose=1,
                                   save_best_only=True)

    reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                  factor=0.2,
                                  patience=3,
                                  min_lr=0.0001)

    model.autoencoder.fit(data_train,
                          data_train,
                          shuffle=True,
                          nb_epoch=args.epochs,
                          batch_size=args.batch_size,
                          callbacks=[checkpointer, reduce_lr],
                          validation_data=(data_test, data_test))
Exemplo n.º 2
0
def visualize_model(args):
    model = MoleculeVAE()

    data, charset = load_dataset(args.data, split=False)

    if os.path.isfile(args.model):
        model.load(charset, args.model)
    else:
        raise ValueError("Model file %s doesn't exist" % args.model)

    plot(model.autoencoder, to_file=args.outfile)
Exemplo n.º 3
0
def autoencoder(args, model):
    latent_dim = args.latent_dim
    data, charset = load_dataset(args.data, split=False)

    if os.path.isfile(args.model):
        model.load(charset, args.model, latent_rep_size=latent_dim)
    else:
        raise ValueError("Model file %s doesn't exist" % args.model)

    sampled = model.autoencoder.predict(data[0].reshape(
        1, 120, len(charset))).argmax(axis=2)[0]
    mol = decode_smiles_from_indexes(map(from_one_hot_array, data[0]), charset)
    sampled = decode_smiles_from_indexes(sampled, charset)
    print(mol)
    print(sampled)
Exemplo n.º 4
0
def main():
    args = get_arguments()
    model = MoleculeVAE()

    data, data_test, charset = load_dataset(args.data)

    if os.path.isfile(args.model):
        model.load(charset, args.model, latent_rep_size=args.latent_dim)
    else:
        raise ValueError("Model file %s doesn't exist" % args.model)

    if not args.visualize:
        x_latent = model.encoder.predict(data)
        np.savetxt(sys.stdout, x_latent, delimiter='\t')
    else:
        visualize_latent_rep(args, model, data)
Exemplo n.º 5
0
def encoder(args, model):
    latent_dim = args.latent_dim
    data, charset = load_dataset(args.data, split=False)

    if os.path.isfile(args.model):
        model.load(charset, args.model, latent_rep_size=latent_dim)
    else:
        raise ValueError("Model file %s doesn't exist" % args.model)

    x_latent = model.encoder.predict(data)
    if args.save_h5:
        h5f = h5py.File(args.save_h5, 'w')
        h5f.create_dataset('charset', data=charset)
        h5f.create_dataset('latent_vectors', data=x_latent)
        h5f.close()
    else:
        np.savetxt(sys.stdout, x_latent, delimiter='\t')
Exemplo n.º 6
0
def main():
    args = get_arguments()
    model = MoleculeVAE()

    data, charset = load_dataset('data/all_smiles_120_one_hot.h5', split=False)

    if os.path.isfile(args.model):
        model.load(charset, args.model)
    else:
        raise ValueError("Model file %s doesn't exist" % args.model)

    sampled = model.autoencoder.predict(data[100].reshape(
        1, 120, len(charset))).argmax(axis=2)[0]

    mol = decode_smiles_from_indexes(map(from_one_hot_array, data[100]),
                                     charset)
    sampled = decode_smiles_from_indexes(sampled, charset)
    print(mol)
    print(sampled)
Exemplo n.º 7
0
def main():
    args = get_arguments()
    model = MoleculeVAE()

    data, data_test, charset = load_dataset(args.data)

    if os.path.isfile(args.model):
        model.load(charset, args.model, latent_rep_size=args.latent_dim)
    else:
        raise ValueError("Model file %s doesn't exist" % args.model)

    x_latent = model.encoder.predict(data)
    if not args.visualize:
        if not args.save_h5:
            np.savetxt(sys.stdout, x_latent, delimiter='\t')
        else:
            h5f = h5py.File(args.save_h5, 'w')
            h5f.create_dataset('charset', data=charset)
            h5f.create_dataset('latent_vectors', data=x_latent)
            h5f.close()
    else:
        visualize_latent_rep(args, model, x_latent)