Source code for mmagic.models.editors.dcgan.dcgan_generator
# Copyright (c) OpenMMLab. All rights reserved.
import numpy as np
import torch
import torch.nn as nn
from mmcv.cnn import ConvModule
from mmengine.model import BaseModule, normal_init, update_init_info
from mmengine.utils.dl_utils.parrots_wrapper import _BatchNorm
from mmagic.registry import MODELS
from ...utils import get_module_device
@MODELS.register_module()
[docs]class DCGANGenerator(BaseModule):
"""Generator for DCGAN.
Implementation Details for DCGAN architecture:
#. Adopt transposed convolution in the generator;
#. Use batchnorm in the generator except for the final output layer;
#. Use ReLU in the generator in addition to the final output layer.
More details can be found in the original paper:
Unsupervised Representation Learning with Deep Convolutional
Generative Adversarial Networks
http://arxiv.org/abs/1511.06434
Args:
output_scale (int | tuple[int]): Output scale for the generated
image. If only a integer is provided, the output image will
be a square shape. The tuple of two integers will set the
height and width for the output image, respectively.
out_channels (int, optional): The channel number of the output feature.
Default to 3.
base_channels (int, optional): The basic channel number of the
generator. The other layers contains channels based on this number.
Default to 1024.
input_scale (int | tuple[int], optional): Output scale for the
generated image. If only a integer is provided, the input feature
ahead of the convolutional generator will be a square shape. The
tuple of two integers will set the height and width for the input
convolutional feature, respectively. Defaults to 4.
noise_size (int, optional): Size of the input noise
vector. Defaults to 100.
default_norm_cfg (dict, optional): Norm config for all of layers
except for the final output layer. Defaults to ``dict(type='BN')``.
default_act_cfg (dict, optional): Activation config for all of layers
except for the final output layer. Defaults to
``dict(type='ReLU')``.
out_act_cfg (dict, optional): Activation config for the final output
layer. Defaults to ``dict(type='Tanh')``.
init_cfg (dict, optional): Initialization config dict. Default: None.
"""
def __init__(self,
output_scale,
out_channels=3,
base_channels=1024,
input_scale=4,
noise_size=100,
default_norm_cfg=dict(type='BN'),
default_act_cfg=dict(type='ReLU'),
out_act_cfg=dict(type='Tanh'),
init_cfg=None):
super().__init__(init_cfg=init_cfg)
self.output_scale = output_scale
self.base_channels = base_channels
self.input_scale = input_scale
self.noise_size = noise_size
# the number of times for upsampling
self.num_upsamples = int(np.log2(output_scale // input_scale))
# output 4x4 feature map
self.noise2feat = ConvModule(
noise_size,
base_channels,
kernel_size=4,
stride=1,
padding=0,
conv_cfg=dict(type='ConvTranspose2d'),
norm_cfg=default_norm_cfg,
act_cfg=default_act_cfg)
# build up upsampling backbone (excluding the output layer)
upsampling = []
curr_channel = base_channels
for _ in range(self.num_upsamples - 1):
upsampling.append(
ConvModule(
curr_channel,
curr_channel // 2,
kernel_size=4,
stride=2,
padding=1,
conv_cfg=dict(type='ConvTranspose2d'),
norm_cfg=default_norm_cfg,
act_cfg=default_act_cfg))
curr_channel //= 2
self.upsampling = nn.Sequential(*upsampling)
# output layer
self.output_layer = ConvModule(
curr_channel,
out_channels,
kernel_size=4,
stride=2,
padding=1,
conv_cfg=dict(type='ConvTranspose2d'),
norm_cfg=None,
act_cfg=out_act_cfg)
# self.init_weights(pretrained=pretrained)
[docs] def forward(self, noise, num_batches=0, return_noise=False):
"""Forward function.
Args:
noise (torch.Tensor | callable | None): You can directly give a
batch of noise through a ``torch.Tensor`` or offer a callable
function to sample a batch of noise data. Otherwise, the
``None`` indicates to use the default noise sampler.
num_batches (int, optional): The number of batch size.
Defaults to 0.
return_noise (bool, optional): If True, ``noise_batch`` will be
returned in a dict with ``fake_img``. Defaults to False.
Returns:
torch.Tensor | dict: If not ``return_noise``, only the output image
will be returned. Otherwise, a dict contains ``fake_img`` and
``noise_batch`` will be returned.
"""
# receive noise and conduct sanity check.
if isinstance(noise, torch.Tensor):
assert noise.shape[1] == self.noise_size
if noise.ndim == 2:
noise_batch = noise[:, :, None, None]
elif noise.ndim == 4:
noise_batch = noise
else:
raise ValueError('The noise should be in shape of (n, c) or '
f'(n, c, 1, 1), but got {noise.shape}')
# receive a noise generator and sample noise.
elif callable(noise):
noise_generator = noise
assert num_batches > 0
noise_batch = noise_generator((num_batches, self.noise_size, 1, 1))
# otherwise, we will adopt default noise sampler.
else:
assert num_batches > 0
noise_batch = torch.randn((num_batches, self.noise_size, 1, 1))
# dirty code for putting data on the right device
noise_batch = noise_batch.to(get_module_device(self))
x = self.noise2feat(noise_batch)
x = self.upsampling(x)
x = self.output_layer(x)
if return_noise:
return dict(fake_img=x, noise_batch=noise_batch)
return x
[docs] def init_weights(self):
"""Init weights for models.
We just use the initialization method proposed in the original paper.
"""
if self.init_cfg is not None and self.init_cfg['type'] == 'Pretrained':
super().init_weights()
return
for m in self.modules():
if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)):
normal_init(m, 0, 0.02)
elif isinstance(m, _BatchNorm):
nn.init.normal_(m.weight.data)
nn.init.constant_(m.bias.data, 0)
# save init info
update_init_info(
m, f'Initialize {m.__class__.__name__} by '
f'\'init_type\' {self.init_cfg}.')