type=str, default='result.txt') parser.add_argument('--smiles_data', help='smiles data', type=str) parser.add_argument('--vocab_from', help='the file where vocab is extracted from', type=str) parser.add_argument('--lr', help='learning rate', type=float, default=0.0001) args = parser.parse_args() #extact vocab and char char, vocab = extract_vocab(args.vocab_from, args.seq_length) vocab_size = len(char) #model and restore model parapmeters model = CVAE(vocab_size, args) model.restore(args.save_file) print('Number of parameters : ', np.sum([np.prod(v.shape) for v in tf.trainable_variables()])) #target property to numpy array start_codon = np.array( [np.array(list(map(vocab.get, 'X'))) for _ in range(args.batch_size)]) #generate smiles smiles = [] for _ in range(args.num_iteration): generated = model.sample(start_codon, args.seq_length) smiles += [ convert_to_smiles(generated[i], char) for i in range(len(generated)) ]
if not os.path.isdir(args.save_dir): os.mkdir(args.save_dir) #divide data into training and test set num_train_data = int(len(molecules_input) * 0.75) train_molecules_input = molecules_input[0:num_train_data] test_molecules_input = molecules_input[num_train_data:-1] train_molecules_output = molecules_output[0:num_train_data] test_molecules_output = molecules_output[num_train_data:-1] train_length = length[0:num_train_data] test_length = length[num_train_data:-1] model = CVAE(vocab_size, args) if args.pretrained is not None: model.restore(args.pretrained) print('Number of parameters : ', np.sum([np.prod(v.shape) for v in tf.trainable_variables()])) for epoch in range(args.num_epochs): st = time.time() # Learning rate scheduling #model.assign_lr(learning_rate * (decay_rate ** epoch)) train_loss = [] test_loss = [] st = time.time() for iteration in range(len(train_molecules_input) // args.batch_size): n = np.random.randint(len(train_molecules_input), size=args.batch_size)