def _load_softmax(self, vocab: C2I, full_vocab_path: str, ckpt_dir: str) -> None: with open(full_vocab_path) as f: full_vocab: List[str] = f.read().strip().split('\n') bias_reader = NewCheckpointReader( os.path.join(ckpt_dir, 'ckpt-softmax8')) full_bias = bias_reader.get_tensor('softmax/b') # SoftMax is chunked into 8 arrays of size 100000x1024 for i in range(8): sm_reader = NewCheckpointReader( os.path.join(ckpt_dir, f'ckpt-softmax{i}')) sm_chunk = sm_reader.get_tensor(f'softmax/W_{i}').astype( np.float32) bias_chunk = full_bias[i:len(full_bias):8] vocab_chunk = full_vocab[i:len(full_bias):8] for j, w in enumerate(vocab_chunk): sm = sm_chunk[j] bias = bias_chunk[j] if w in vocab: self.decoder_w[vocab[w]] = sm self.decoder_b[vocab[w]] = bias if w == '</S>': self.decoder_w[vocab[vocab.eos_token]] = sm self.decoder_b[vocab[vocab.eos_token]] = bias elif w == '<UNK>': self.decoder_w[vocab[vocab.unk_token]] = sm self.decoder_b[vocab[vocab.unk_token]] = bias
def _load_softmax(self, vocab: C2I, full_vocab_path: str, ckpt_dir: str) -> None: from tensorflow.python.pywrap_tensorflow import NewCheckpointReader with open(full_vocab_path) as f: full_vocab: List[str] = f.read().strip().split("\n") bias_reader = NewCheckpointReader( os.path.join(ckpt_dir, "ckpt-softmax8")) full_bias = torch.from_numpy(bias_reader.get_tensor("softmax/b")) # SoftMax is chunked into 8 arrays of size 100000x1024 for i in range(8): sm_reader = NewCheckpointReader( os.path.join(ckpt_dir, f"ckpt-softmax{i}")) sm_chunk = torch.from_numpy(sm_reader.get_tensor(f"softmax/W_{i}")) bias_chunk = full_bias[i:full_bias.size(0):8] vocab_chunk = full_vocab[i:full_bias.size(0):8] for j, w in enumerate(vocab_chunk): sm = sm_chunk[j] bias = bias_chunk[j] if w in vocab: self.decoder_w[vocab[w]] = sm self.decoder_b[vocab[w]] = bias if w == "</S>": self.decoder_w[vocab[vocab.eos_token]] = sm self.decoder_b[vocab[vocab.eos_token]] = bias elif w == "<UNK>": self.decoder_w[vocab[vocab.unk_token]] = sm self.decoder_b[vocab[vocab.unk_token]] = bias
def restore(sess, global_vars): print('from resnet_v2_50 pretrained model') reader = NewCheckpointReader(os.path.join( os.getcwd(), 'model/resnet_v2_50.ckpt')) # restore both weights and biases from conv and shortcut layers restored_var_names = [name + ':0' for name in reader.get_variable_to_dtype_map().keys() if re.match('^.*weights$', name) or re.match('^.*biases$', name)] restored_vars = [var for var in global_vars if var.name in restored_var_names] restored_var_names = [var.name[:-2] for var in restored_vars] value_ph = tf.placeholder(dtype=tf.float32) for i in range(len(restored_var_names)): sess.run(tf.assign(restored_vars[i], value_ph), feed_dict={value_ph: reader.get_tensor(restored_var_names[i])}) initialized_vars = [var for var in global_vars if not var in restored_vars] sess.run(tf.variables_initializer(initialized_vars))
def restore(sess, global_vars): print('from vgg_16 pretrained model') reader = NewCheckpointReader(os.path.join(os.getcwd(), 'model/vgg_16.ckpt')) # no batchnorm from vgg_16 pretrained model restored_var_names = [ name + ':0' for name in reader.get_variable_to_dtype_map().keys() if re.match('^.*weights$', name) ] # skip conv's biases restored_vars = [ var for var in global_vars if var.name in restored_var_names ] restored_var_names = [var.name[:-2] for var in restored_vars] value_ph = tf.placeholder(dtype=tf.float32) for i in range(len(restored_var_names)): sess.run( tf.assign(restored_vars[i], value_ph), feed_dict={value_ph: reader.get_tensor(restored_var_names[i])}) initialized_vars = [var for var in global_vars if not var in restored_vars] sess.run(tf.variables_initializer(initialized_vars))
def _load_lstm(self, ckpt_dir: str, device: str) -> None: from tensorflow.python.pywrap_tensorflow import NewCheckpointReader lstm_reader = NewCheckpointReader(os.path.join(ckpt_dir, "ckpt-lstm")) for l in range(self.num_layers): # Model weights are divided into 8 chunks # Shape: (2048, 32768) self.weight[l] = torch.cat( [ torch.from_numpy( lstm_reader.get_tensor(f"lstm/lstm_{l}/W_{i}")) for i in range(8) ], dim=0, ) # Shape: (32768,) self.bias[l] = torch.from_numpy( lstm_reader.get_tensor(f"lstm/lstm_{l}/B")) # Shape: (8192, 1024) self.weight_P[l] = torch.cat( [ torch.from_numpy( lstm_reader.get_tensor(f"lstm/lstm_{l}/W_P_{i}")) for i in range(8) ], dim=0, ) for p in ["f", "i", "o"]: # Shape: (8192, 8192) self.peepholes[l, p] = torch.from_numpy( lstm_reader.get_tensor( f"lstm/lstm_{l}/W_{p.upper()}_diag")) # Cast to float32 tensors self.weight[l] = self.weight[l].to(config.DTYPE).to(device) self.weight_P[l] = self.weight_P[l].to(config.DTYPE).to(device) self.bias[l] = self.bias[l].to(config.DTYPE).to(device) for p in ["f", "i", "o"]: self.peepholes[l, p] = self.peepholes[l, p].to( config.DTYPE).to(device)
def load_ckpt(self, pretrained): # restore model with ckpt/pretrain or init try: print('trying to restore last checkpoint') last_ckpt_path = tf.train.latest_checkpoint( checkpoint_dir=cfg.ckpt_dir) self.saver.restore(self.sess, save_path=last_ckpt_path) print('restored checkpoint from:', last_ckpt_path) except: if self.is_training: print('init variables') restored_vars = [] global_vars = tf.global_variables() if pretrained: # restore from tf-slim model if os.path.exists( os.path.join(cfg.workspace, premodel['path'])): print('from ' + premodel['endp']) import re from tensorflow.python.pywrap_tensorflow import NewCheckpointReader reader = NewCheckpointReader( os.path.join(cfg.workspace, premodel['ckpt'])) # only restoring conv's weights restored_var_names = [ name + ':0' for name in reader.get_variable_to_dtype_map().keys() if re.match(premodel['rptn'], name) ] # update restored variables from pretrained model restored_vars = [ var for var in global_vars if var.name in restored_var_names ] # update restored variables' name restored_var_names = [ var.name[:-2] for var in restored_vars ] # assignment variables value_ph = tf.placeholder(tf.float32, shape=None) for i in range(len(restored_var_names)): self.sess.run( tf.assign(restored_vars[i], value_ph), feed_dict={ value_ph: reader.get_tensor(restored_var_names[i]) }) initialized_vars = list(set(global_vars) - set(restored_vars)) self.sess.run(tf.variables_initializer(initialized_vars))
def _load_lstm(self, ckpt_dir: str) -> None: lstm_reader = NewCheckpointReader(os.path.join(ckpt_dir, 'ckpt-lstm')) for l in range(2): # Model weights are divided into 8 chunks # (32768, 2048) self.weight[l] = np.concatenate([ lstm_reader.get_tensor(f'lstm/lstm_{l}/W_{i}') for i in range(8) ]).astype(np.float32).T # (32768,) self.bias[l] = lstm_reader.get_tensor(f'lstm/lstm_{l}/B').astype( np.float32) # (8192, 1024) self.weight_P[l] = np.concatenate([ lstm_reader.get_tensor(f'lstm/lstm_{l}/W_P_{i}') for i in range(8) ]).astype(np.float32) for p in ['F', 'I', 'O']: self.peepholes[l, p.lower()] = \ lstm_reader.get_tensor(f'lstm/lstm_{l}/W_{p}_diag').astype(np.float32)
os.path.join(checkpoint_dir, d) for d in os.listdir(checkpoint_dir) ] checkpoint_dir = files[0] if os.path.isdir(files[0]) else checkpoint_dir elif args.checkpoint.endswith('.zip'): checkpoint_dir = args.tmp zipfile.ZipFile(args.checkpoint).extractall(checkpoint_dir) files = [ os.path.join(checkpoint_dir, d) for d in os.listdir(checkpoint_dir) ] checkpoint_dir = files[0] if os.path.isdir(files[0]) else checkpoint_dir else: checkpoint_dir = args.checkpoint reader = NewCheckpointReader( tensorflow.train.latest_checkpoint(checkpoint_dir)) blobs = {k: reader.get_tensor(k) for k in reader.get_variable_to_shape_map()} if args.output_path.endswith('.json'): with open(args.output_path, 'w') as f: json.dump({k: blob.tolist() for k, blob in blobs.items()}, f, sort_keys=True, indent=2) elif args.output_path.endswith('.h5'): import h5py with h5py.File(args.output_path, 'w') as h: h.update(**blobs) elif args.output_path.endswith('.npy') or args.output_path.endswith('.npz'): (np.savez if args.output_path[-1] == 'z' else numpy.save)(args.output_path, **blobs)