def decode(self, quant_t, quant_b): upsample_t = self.upsample_t(quant_t) quant = torch.cat([upsample_t, quant_b], 1) dec = checkpoint(self.dec, quant) dec = checkpoint(self.final_conv, dec) return dec
def _forward_impl(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) l1 = checkpoint(self.layer1, x) l2 = checkpoint(self.layer2, l1) l3 = checkpoint(self.layer3, l2) l4 = checkpoint(self.layer4, l3) return l1, l2, l3, l4
def forward(self, x): b, *_ = x.shape quantize_loss = torch.zeros(1).to(x) for (block, attn_block, q_block) in zip(self.blocks, self.attn_blocks, self.quantize_blocks): if self.do_checkpointing: x = checkpoint(block, x) else: x = block(x) if exists(attn_block): x = attn_block(x) if exists(q_block): x, _, loss = q_block(x) quantize_loss += loss x = self.final_conv(x) x = self.flatten(x) x = self.to_logit(x) if exists(q_block): return x.squeeze(), quantize_loss else: return x.squeeze()
def forward(self, x, timesteps, low_res, correction_factors=None): emb = self.time_embed(timestep_embedding(timesteps, self.mid_channels)) _, _, new_height, new_width = x.shape upsampled = F.interpolate(low_res, (new_height, new_width), mode="bilinear") x = torch.cat([x, upsampled], dim=1) if correction_factors is not None: correction_factors = correction_factors.view(x.shape[0], -1, 1, 1).repeat(1, 1, new_height, new_width) else: correction_factors = torch.zeros((b, self.num_corruptions, new_height, new_width), dtype=torch.float, device=x.device) x = torch.cat([x, correction_factors], dim=1) d1 = self.input_block(x) d2 = self.down1(d1) feat = self.down2(d2) for bl in self.body: feat = checkpoint(bl, feat, emb) feat = feat[:, :self.mid_channels] feat = self.conv_body(feat) # upsample out = torch.cat([self.lrelu( self.normalize(self.conv_up1(F.interpolate(feat, scale_factor=2, mode='nearest')))), d2], dim=1) out = torch.cat([self.lrelu( self.normalize(self.conv_up2(F.interpolate(out, scale_factor=2, mode='nearest')))), d1], dim=1) out = self.conv_last(self.normalize(self.lrelu(self.conv_hr(out)))) return out
def forward(self, x, already_quantized=False): """ Expect input as shape [b, c, h, w] """ if not already_quantized: x = self.quantize(x) length, batch = x.shape h = self.token_embeddings(x) # prepend sos token sos = torch.ones(1, batch, self.embed_dim, device=x.device) * self.sos h = torch.cat([sos, h[:-1, :, :]], axis=0) # add positional embeddings positions = torch.arange(length, device=x.device).unsqueeze(-1) h = h + self.position_embeddings(positions).expand_as(h) # transformer for layer in self.layers: h = checkpoint(layer, h) h = self.ln_f(h) logits = self.head(h) return logits, x
def forward(self, styles, input_noise, structure_input=None, starting_shape=None): batch_size = styles.shape[0] image_size = self.image_size if self.no_const: avg_style = styles.mean(dim=1)[:, :, None, None] x = self.to_initial_block(avg_style) else: x = self.initial_block.expand(batch_size, -1, -1, -1) if starting_shape is not None: x = F.interpolate(x, size=starting_shape, mode="bilinear") rgb = None styles = styles.transpose(0, 1) x = self.initial_conv(x) if structure_input is not None: s = torch.nn.functional.interpolate(structure_input, size=x.shape[2:], mode="nearest") for style, block, attn in zip(styles, self.blocks, self.attns): if exists(attn): x = checkpoint(attn, x) if structure_input is not None: if exists(block.upsample): # In this case, the structural guidance is given by the extra information over the previous layer. twoX = (x.shape[2] * 2, x.shape[3] * 2) sn = torch.nn.functional.interpolate(structure_input, size=twoX, mode="nearest") s_int = torch.nn.functional.interpolate(s, size=twoX, mode="bilinear") s_diff = sn - s_int else: # This is the initial case - just feed in the base structure. s_diff = s else: s_diff = None x, rgb = checkpoint(block, x, rgb, style, input_noise, s_diff) return rgb
def forward(self, x, ref=None): switch_enc = checkpoint( self.resnet_encoder, F.interpolate(x, scale_factor=2, mode="bilinear")) x_lg = x feat = self.conv_first(x_lg) feat = sequential_checkpoint( self.body, self.num_blocks // self.blocks_per_checkpoint, feat) feat = feat[:, :self.reduce_ch] body_feat = checkpoint(self.conv_body, feat, switch_enc) feat = feat + body_feat # upsample out = self.lrelu( checkpoint(self.conv_up1, F.interpolate(feat, scale_factor=2, mode='nearest'), switch_enc)) if self.scale >= 4: out = self.lrelu( checkpoint(self.conv_up2, F.interpolate(out, scale_factor=2, mode='nearest'), switch_enc)) if self.scale >= 8: out = self.lrelu( self.conv_up3( F.interpolate(out, scale_factor=2, mode='nearest'), switch_enc)) else: out = self.lrelu(checkpoint(self.conv_up2, out, switch_enc)) out = checkpoint(self.conv_hr, out, switch_enc) out = checkpoint(self.conv_last, self.lrelu(out), switch_enc) return out
def _forward_impl(self, x): # Should be the exact same implementation of torchvision.models.resnet.ResNet.forward_impl, # except using checkpoints on the body conv layers. x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = checkpoint(self.layer1, x) x = checkpoint(self.layer2, x) x = checkpoint(self.layer3, x) x = checkpoint(self.layer4, x) x = self.avgpool(x) x = torch.flatten(x, 1) x = self.fc(x) return x
def forward(self, x, emb): """ Apply the block to a Tensor, conditioned on a timestep embedding. :param x: an [N x C x ...] Tensor of features. :param emb: an [N x emb_channels] Tensor of timestep embeddings. :return: an [N x C x ...] Tensor of outputs. """ return checkpoint(self._forward, x, emb)
def encode(self, input): enc_b = checkpoint(self.enc_b, input) enc_t = checkpoint(self.enc_t, enc_b) quant_t = self.quantize_conv_t(enc_t).permute(0, 2, 3, 1) quant_t, diff_t, id_t = self.quantize_t(quant_t) quant_t = quant_t.permute(0, 3, 1, 2) diff_t = diff_t.unsqueeze(0) dec_t = checkpoint(self.dec_t, quant_t) enc_b = torch.cat([dec_t, enc_b], 1) quant_b = checkpoint(self.quantize_conv_b, enc_b).permute(0, 2, 3, 1) quant_b, diff_b, id_b = self.quantize_b(quant_b) quant_b = quant_b.permute(0, 3, 1, 2) diff_b = diff_b.unsqueeze(0) return quant_t, quant_b, diff_t + diff_b, id_t, id_b
def _forward_impl(self, x: Tensor, mask: Tensor) -> Tensor: # See note [TorchScript super()] x = self.conv1(x, mask) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) for m in [self.layer1, self.layer2, self.layer3, self.layer4]: m.masks = mask x = checkpoint(self.layer1, x) x = checkpoint(self.layer2, x) x = checkpoint(self.layer3, x) x = checkpoint(self.layer4, x) x = self.avgpool(x) x = torch.flatten(x, 1) x = self.fc(x) return x
def forward(self, rrdb_fea, latent_bank_fea): fea = self.initial_conv(rrdb_fea) for i, block in enumerate(self.decoder_blocks): # The paper calls for PixelShuffle here, but I don't have good experience with that. It also doesn't align with the way the underlying StyleGAN works. fea = nn.functional.interpolate(fea, scale_factor=2, mode="nearest") fea = torch.cat([fea, latent_bank_fea[i]], dim=1) fea = checkpoint(block, fea) return self.final_decode(fea)
def _forward_impl(self, x): level = self.level_conv(x) x0 = self.relu(self.bn1(self.conv1(x))) x = self.maxpool(x0) x1 = checkpoint(self.layer1, x) x2 = checkpoint(self.layer2, x1) x3 = checkpoint(self.layer3, x2) x4 = checkpoint(self.layer4, x3) unused = self.avgpool( x4 ) # This is performed for instance-level pixpro learning, even though it is unused. x = checkpoint(self.uplayers[0], x4) x = checkpoint(self.uplayers[1], x, x3) x = checkpoint(self.uplayers[2], x, x2) x = checkpoint(self.uplayers[3], x, x1) x = checkpoint(self.uplayers[4], x, x0) return checkpoint(self.tail, torch.cat([x, level], dim=1))
def forward(self, x): fea = self.initial_conv(x) fea = sequential_checkpoint(self.rrdb_blocks, len(self.rrdb_blocks), fea) rrdb_fea = fea convolutional_features = [] for reducer in self.reducers: fea, f = checkpoint(reducer, fea) convolutional_features.append(f) latents = self.latent_conv(fea) latents = self.latent_linear(latents.flatten(1, -1)).view( fea.shape[0], self.latent_bank_blocks, -1) return rrdb_fea, convolutional_features, latents
def encode(self, gt, rrdbResults, logdet=0.0, epses=None, y_onehot=None): fl_fea = gt reverse = False level_conditionals = {} bypasses = {} L = opt_get(self.opt, ['networks', 'generator', 'flow', 'L']) for level in range(1, L + 1): bypasses[level] = torch.nn.functional.interpolate( gt, scale_factor=2**-level, mode='bilinear', align_corners=False) for layer, shape in zip(self.layers, self.output_shapes): size = shape[2] level = int(np.log(self.patch_sz / size) / np.log(2)) if level > 0 and level not in level_conditionals.keys(): level_conditionals[level] = rrdbResults[ self.levelToName[level]] level_conditionals[level] = rrdbResults[self.levelToName[level]] if isinstance(layer, FlowStep): fl_fea, logdet = checkpoint(layer, fl_fea, logdet, level_conditionals[level]) elif isinstance(layer, Split2d): fl_fea, logdet = self.forward_split2d( epses, fl_fea, layer, logdet, reverse, level_conditionals[level], y_onehot=y_onehot) else: fl_fea, logdet = layer(fl_fea, logdet, reverse=reverse) z = fl_fea if not isinstance(epses, list): return z, logdet epses.append(z) return epses, logdet
def forward(self, lr, styles): b, c, h, w = lr.shape if self.transfer_mode: with torch.no_grad(): x = self.encoder(lr) else: x = self.encoder(lr) styles = styles.transpose(0, 1) input_noise = torch.rand(b, h * self.scale, w * self.scale, 1).to(lr.device) if h != x.shape[-2]: rgb = F.interpolate(lr, size=x.shape[2:], mode="area") else: rgb = lr for style, block in zip(styles, self.blocks): x, rgb = checkpoint(block, x, rgb, style, input_noise) return rgb
def _forward_impl(self, x): # Should be the exact same implementation of torchvision.models.resnet.ResNet.forward_impl, # except using checkpoints on the body conv layers. x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x1 = checkpoint(self.layer1, x) x2 = checkpoint(self.layer2, x1) x3 = checkpoint(self.layer3, x2) x4 = checkpoint(self.layer4, x3) unused = self.avgpool(x4) # This is performed for instance-level pixpro learning, even though it is unused. x = checkpoint(self.uplayers[0], x4) x = checkpoint(self.uplayers[1], x, x3) #x = checkpoint(self.uplayers[2], x, x2) #x = checkpoint(self.uplayers[3], x, x1) return checkpoint(self.tail, torch.cat([x, x2], dim=1))
def forward(self, x): return checkpoint(self._forward, x)
def forward(self, x): representation = self.get_representation(x) projector = self._get_projector(representation) projection = checkpoint(projector, representation) return projection
def forward(self, x): fea = self.initial_conv(x) for rrdb in self.rrdbs: fea = torch.cat([fea, checkpoint(rrdb, fea)], dim=1) return fea