def main(): cfg_grammar_file = '../../dropbox/context_free_grammars/prog_leftskew.grammar' grammar = parser.Grammar(cfg_grammar_file) ts = parser.parse( 'v1=sin(v0);v2=v0*4;v3=v1/v2;v4=cos(v0);v5=v0*3;v6=sin(v1);v7=v3-v6;v8=v7+v5;v9=v8+v4;return:v9', grammar ) t = ts[0] print('(ugly) tree:') print(t) print() print('for root:') print( 'symbol is %s, is it non-terminal = %s, it\' value is %s (of type %s)' % (t.symbol, isinstance(t, parser.Nonterminal), t.symbol.symbol(), type(t.symbol.symbol())) ) print( 'rule is %s, its left side is %s (of type %s), its right side is %s, a tuple ' 'which each element can be either str (for terminal) or Nonterminal (for nonterminal)' % ( t.rule, t.rule.lhs(), type(t.rule.lhs()), t.rule.rhs(), ) )
def encode(self, chunk, use_random=False): """ Args: chunk: a list of `n` strings, each being a SMILES. Returns: A numpy array of dtype np.float32, of shape (n, latent_dim) Note: Each row should be the *mean* of the latent space distrubtion rather than a sampled point from that distribution. (It can be anythin as long as it fits what self.decode expects) """ cfg_tree_list = [] for smiles in chunk: ts = parser.parse(smiles, self.grammar) assert isinstance(ts, list) and len(ts) == 1 n = AnnotatedTree2MolTree(ts[0]) cfg_tree_list.append(n) if type(chunk[0]) is str: cfg_tree_list = parse(chunk, self.grammar) else: cfg_tree_list = chunk onehot, _ = batch_make_att_masks(cfg_tree_list, self.tree_decoder, self.onehot_walker, dtype=np.float32) x_inputs = np.transpose(onehot, [0, 2, 1]) x_inputs = paddle.to_tensor(x_inputs) z_mean, _ = self.ae.encoder(x_inputs) return z_mean.numpy()
def save_map(self, n=None, notify=True): if n: self._NAME = n else: n = self._NAME # create world folder if needed p = os.path.join("saves", n) if not os.path.exists(p): os.mkdir(p) # construct structure to save t = self.tiles[:] data_struct = { "map": t, "width": self.w, "height": self.h, "sun_pos": self.sun_pos, "diagnostics": self._DIAGNOSTIC, "inventory": self.inventory.slots } # parse data ps = cfg_parser.parse(data_struct) # save it # with open(os.path.join(p, "main.pkl"), "w") as f: with open(os.path.join(p, "main.pkl"), "w") as f: f.write(ps) if notify: self.notify.msg("Saved", "Map '"+n+"' has been saved.") print "saved"
def main(): cfg_grammar_file = '../../dropbox/context_free_grammars/prog_leftskew.grammar' grammar = parser.Grammar(cfg_grammar_file) ts = parser.parse( 'v1=sin(v0);v2=v0*4;v3=v1/v2;v4=cos(v0);v5=v0*3;v6=sin(v1);v7=v3-v6;v8=v7+v5;v9=v8+v4;return:v9', grammar) t = ts[0] print('(ugly) tree:') print(t) print() print('for root:') print( 'symbol is %s, is it non-terminal = %s, it\' value is %s (of type %s)' % (t.symbol, isinstance(t, parser.Nonterminal), t.symbol.symbol(), type(t.symbol.symbol()))) print( 'rule is %s, its left side is %s (of type %s), its right side is %s, a tuple ' 'which each element can be either str (for terminal) or Nonterminal (for nonterminal)' % ( t.rule, t.rule.lhs(), type(t.rule.lhs()), t.rule.rhs(), ))
def save_map(self, n=None, notify=True): if n: self._NAME = n else: n = self._NAME # create world folder if needed p = os.path.join("saves", n) if not os.path.exists(p): os.mkdir(p) # construct structure to save t = self.tiles[:] data_struct = { "map": t, "width": self.w, "height": self.h, "sun_pos": self.sun_pos, "diagnostics": self._DIAGNOSTIC, "inventory": self.inventory.slots } # parse data ps = cfg_parser.parse(data_struct) # save it # with open(os.path.join(p, "main.pkl"), "w") as f: with open(os.path.join(p, "main.pkl"), "w") as f: f.write(ps) if notify: self.notify.msg("Saved", "Map '" + n + "' has been saved.") print "saved"
def parse_single(smiles, grammar): """ tbd """ ts = parser.parse(smiles, grammar) assert isinstance(ts, list) and len(ts) == 1 n = AnnotatedTree2MolTree(ts[0]) return n
def parse_smiles_with_cfg(smiles_file, grammar_file): grammar = parser.Grammar(grammar_file) cfg_tree_list = [] with open(smiles_file, 'r') as f: for row in tqdm(f): smiles = row.strip() ts = parser.parse(smiles, grammar) assert isinstance(ts, list) and len(ts) == 1 n = AnnotatedTree2MolTree(ts[0]) cfg_tree_list.append(n) return cfg_tree_list
def parse_smiles_with_cfg(smiles_file, grammar_file): grammar = parser.Grammar(cmd_args.grammar_file) cfg_tree_list = [] with open(smiles_file, 'r') as f: for row in tqdm(f): smiles = row.strip() ts = parser.parse(smiles, grammar) assert isinstance(ts, list) and len(ts) == 1 n = AnnotatedTree2MolTree(ts[0]) cfg_tree_list.append(n) return cfg_tree_list
def process_chunk(program_list): grammar = parser.Grammar(cmd_args.grammar_file) cfg_tree_list = [] for program in program_list: ts = parser.parse(program, grammar) assert isinstance(ts, list) and len(ts) == 1 n = AnnotatedTree2ProgTree(ts[0]) cfg_tree_list.append(n) walker = ProgramOnehotBuilder() tree_decoder = ProgTreeDecoder() onehot, masks = batch_make_att_masks(cfg_tree_list, tree_decoder, walker, dtype=np.byte) return (onehot, masks)
def process_chunk(smiles_list): grammar = parser.Grammar(cmd_args.grammar_file) cfg_tree_list = [] for smiles in smiles_list: ts = parser.parse(smiles, grammar) assert isinstance(ts, list) and len(ts) == 1 n = AnnotatedTree2MolTree(ts[0]) cfg_tree_list.append(n) walker = OnehotBuilder() tree_decoder = create_tree_decoder() onehot, masks = batch_make_att_masks(cfg_tree_list, tree_decoder, walker, dtype=np.byte) return (onehot, masks)
def parse_smiles_with_cfg(smiles_file, grammar_file): grammar = parser.Grammar(cmd_args.grammar_file) smiles_list = [] cfg_tree_list = [] annotated_trees = [] with open(smiles_file, 'r') as f: for row in tqdm(f): smiles = row.strip() smiles_list.append(smiles) ts = parser.parse(smiles, grammar) assert isinstance(ts, list) and len(ts) == 1 annotated_trees.append(ts[0]) n = AnnotatedTree2MolTree(ts[0]) cfg_tree_list.append(n) st = get_smiles_from_tree(n) assert st == smiles return (smiles_list, cfg_tree_list, annotated_trees)
for c in node.children: dfs_indices(c, result) def AnnotatedTree2RuleIndices(annotated_root): result = [] dfs_indices(annotated_root, result) return np.array(result) def AnnotatedTree2Onehot(annotated_root, max_len): cur_indices = AnnotatedTree2RuleIndices(annotated_root) assert len(cur_indices) <= max_len x_cpu = np.zeros((DECISION_DIM, max_len), dtype=np.float32) x_cpu[cur_indices, np.arange(len(cur_indices))] = 1.0 x_cpu[-1, np.arange(len(cur_indices), max_len)] = 1.0 # padding return x_cpu if __name__ == '__main__': smiles = 'OSC' grammar = cfg_parser.Grammar(cmd_args.grammar_file) ts = cfg_parser.parse(smiles, grammar) assert isinstance(ts, list) and len(ts) == 1 print(AnnotatedTree2RuleIndices(ts[0]))
def parse(chunk, grammar): size = 100 result_list = Parallel(n_jobs=-1)(delayed(parse_many)(chunk[i: i + size], grammar) for i in range(0, len(chunk), size)) return [_1 for _0 in result_list for _1 in _0] import cPickle as cp from tqdm import tqdm if __name__ == '__main__': smiles_file = cmd_args.smiles_file fname = '.'.join(smiles_file.split('.')[0:-1]) + '.cfg_dump' fout = open(fname, 'wb') grammar = parser.Grammar(cmd_args.grammar_file) with open(smiles_file, 'r') as f: smiles = f.readlines() for i in range(len(smiles)): smiles[ i ] = smiles[ i ].strip() # cfg_tree_list = parse(smiles, grammar) # cp.dump(cfg_tree_list, fout, cp.HIGHEST_PROTOCOL) for i in tqdm(range(len(smiles))): ts = parser.parse(smiles[i], grammar) assert isinstance(ts, list) and len(ts) == 1 n = AnnotatedTree2MolTree(ts[0]) cp.dump(n, fout, cp.HIGHEST_PROTOCOL) fout.close()
flatten = h3.view(x_cpu.shape[0], -1) h = self.w1(flatten) h = F.relu(h) z_mean = self.mean_w(h) z_log_var = self.log_var_w(h) return (z_mean, z_log_var) if __name__ == '__main__': smiles_list = ['N\SCPP#IOS', 'CP\P', 'PINI'] cfg_trees = [] cfg_onehots = [] grammar = parser.Grammar(cmd_args.grammar_file) for smiles in smiles_list: ts = parser.parse(smiles, grammar) assert isinstance(ts, list) and len(ts) == 1 n = AnnotatedTree2MolTree(ts[0]) cfg_trees.append(n) cfg_onehots.append(AnnotatedTree2Onehot(ts[0], 50)) cfg_onehots = np.stack(cfg_onehots, axis=0) encoder = CNNEncoder(max_len=50, latent_dim=64) if cmd_args.mode == 'gpu': encoder.cuda() z = encoder(cfg_onehots) print(z[0].size())
result.append(idx) for c in node.children: dfs_indices(c, result) def AnnotatedTree2RuleIndices(annotated_root): result = [] dfs_indices(annotated_root, result) return np.array(result) def AnnotatedTree2Onehot(annotated_root, max_len): cur_indices = AnnotatedTree2RuleIndices(annotated_root) assert len(cur_indices) <= max_len x_cpu = np.zeros(( DECISION_DIM, max_len ), dtype=np.float32) x_cpu[cur_indices, np.arange(len(cur_indices))] = 1.0 x_cpu[-1, np.arange(len(cur_indices), max_len)] = 1.0 # padding return x_cpu if __name__ == '__main__': smiles = 'OSC' grammar = cfg_parser.Grammar(cmd_args.grammar_file) ts = cfg_parser.parse(smiles, grammar) assert isinstance(ts, list) and len(ts) == 1 print(AnnotatedTree2RuleIndices(ts[0]))
def parse_single(program, grammar): ts = parser.parse(program, grammar) assert isinstance(ts, list) and len(ts) == 1 n = AnnotatedTree2ProgTree(ts[0]) return n
for _0 in result_list: for _1 in _0: return_value.append(_1) return return_value if __name__ == '__main__': smiles_file = cmd_args.smiles_file fname = '.'.join(smiles_file.split('.')[0:-1]) + '.cfg_dump' fout = open(fname, 'wb') grammar = parser.Grammar(cmd_args.grammar_file) # load smiles strings as a list with open(smiles_file, 'r') as f: smiles = f.readlines() for i in range(len(smiles)): smiles[i] = smiles[i].strip() # cfg_tree_list = parse(smiles, grammar) # cp.dump(cfg_tree_list, fout, cp.HIGHEST_PROTOCOL) for i in tqdm(range(len(smiles))): ts = parser.parse(smiles[i], grammar) assert isinstance(ts, list) and len(ts) == 1 n = AnnotatedTree2MolTree(ts[0]) pickle.dump(n, fout, pickle.HIGHEST_PROTOCOL) fout.close()
h = self.w1(flatten) h = F.relu(h) z_mean = self.mean_w(h) z_log_var = self.log_var_w(h) return (z_mean, z_log_var) if __name__ == '__main__': smiles_list = ['N\SCPP#IOS', 'CP\P', 'PINI'] cfg_trees = [] cfg_onehots = [] grammar = parser.Grammar(cmd_args.grammar_file) for smiles in smiles_list: ts = parser.parse(smiles, grammar) assert isinstance(ts, list) and len(ts) == 1 n = AnnotatedTree2MolTree(ts[0]) cfg_trees.append(n) cfg_onehots.append(AnnotatedTree2Onehot(ts[0], 50)) cfg_onehots = np.stack(cfg_onehots, axis=0) encoder = CNNEncoder(max_len=50, latent_dim=64) if cmd_args.mode == 'gpu': encoder.cuda() z = encoder(cfg_onehots) print(z[0].size())
#!/usr/bin/env python3 import cfg_parser as parser info_folder = '../../dropbox/context_free_grammars' grammar = parser.Grammar(info_folder + '/mol_zinc.grammar') ts = parser.parse('ClI=I=S(CBI)(-CN(C-N(N-C-F))I(S-I)C-C=I)', grammar) t = ts[0] print('(ugly) tree:') print(t) print() print('for root:') print('symbol is %s, is it non-terminal = %s, it\' value is %s (of type %s)' % ( t.symbol, isinstance(t, parser.Nonterminal), t.symbol.symbol(), type(t.symbol.symbol()) )) print('rule is %s, its left side is %s (of type %s), its right side is %s, a tuple ' 'which each element can be either str (for terminal) or Nonterminal (for nonterminal)' % ( t.rule, t.rule.lhs(), type(t.rule.lhs()), t.rule.rhs(), ))
def parse_single(smiles, grammar): ts = parser.parse(smiles, grammar) assert isinstance(ts, list) and len(ts) == 1 n = AnnotatedTree2MolTree(ts[0]) return n