Source code for mmagic.models.editors.dcgan.dcgan_discriminator

# Copyright (c) OpenMMLab. All rights reserved.
import numpy as np
import torch.nn as nn
from mmcv.cnn import ConvModule
from mmengine.model import BaseModule, normal_init, update_init_info
from mmengine.utils.dl_utils.parrots_wrapper import _BatchNorm

from mmagic.registry import MODELS


@MODELS.register_module()
[docs]class DCGANDiscriminator(BaseModule):
    """Discriminator for DCGAN.

    Implementation Details for DCGAN architecture:

    #. Adopt convolution in the discriminator;
    #. Use batchnorm in the discriminator except for the input and final \
       output layer;
    #. Use LeakyReLU in the discriminator in addition to the output layer.

    Args:
        input_scale (int): The scale of the input image.
        output_scale (int): The final scale of the convolutional feature.
        out_channels (int): The channel number of the final output layer.
        in_channels (int, optional): The channel number of the input image.
            Defaults to 3.
        base_channels (int, optional): The basic channel number of the
            generator. The other layers contains channels based on this number.
            Defaults to 128.
        default_norm_cfg (dict, optional): Norm config for all of layers
            except for the final output layer. Defaults to ``dict(type='BN')``.
        default_act_cfg (dict, optional): Activation config for all of layers
            except for the final output layer. Defaults to
            ``dict(type='ReLU')``.
        out_act_cfg (dict, optional): Activation config for the final output
            layer. Defaults to ``dict(type='Tanh')``.
        pretrained (str, optional): Path for the pretrained model. Default to
            ``None``.
        init_cfg (dict, optional): Initialization config dict. Default: None.
    """

    def __init__(self,
                 input_scale,
                 output_scale,
                 out_channels,
                 in_channels=3,
                 base_channels=128,
                 default_norm_cfg=dict(type='BN'),
                 default_act_cfg=dict(type='LeakyReLU'),
                 out_act_cfg=None,
                 init_cfg=None):
        super().__init__(init_cfg=init_cfg)
        self.input_scale = input_scale
        self.output_scale = output_scale
        self.out_channels = out_channels
        self.base_channels = base_channels

        # the number of times for downsampling
        self.num_downsamples = int(np.log2(input_scale // output_scale))

        # build up downsampling backbone (excluding the output layer)
        downsamples = []
        curr_channels = in_channels
        for i in range(self.num_downsamples):
            # remove norm for the first conv
            norm_cfg_ = None if i == 0 else default_norm_cfg
            in_ch = in_channels if i == 0 else base_channels * 2**(i - 1)

            downsamples.append(
                ConvModule(
                    in_ch,
                    base_channels * 2**i,
                    kernel_size=4,
                    stride=2,
                    padding=1,
                    conv_cfg=dict(type='Conv2d'),
                    norm_cfg=norm_cfg_,
                    act_cfg=default_act_cfg))
            curr_channels = base_channels * 2**i

        self.downsamples = nn.Sequential(*downsamples)

        # define output layer
        self.output_layer = ConvModule(
            curr_channels,
            out_channels,
            kernel_size=4,
            stride=1,
            padding=0,
            conv_cfg=dict(type='Conv2d'),
            norm_cfg=None,
            act_cfg=out_act_cfg)

[docs]    def forward(self, x):
        """Forward function.

        Args:
            x (torch.Tensor): Fake or real image tensor.

        Returns:
            torch.Tensor: Prediction for the reality of the input image.
        """

        n = x.shape[0]
        x = self.downsamples(x)
        x = self.output_layer(x)

        # reshape to a flatten feature
        return x.view(n, -1)

[docs]    def init_weights(self):
        """Init weights for models.

        We just use the initialization method proposed in the original paper.
        """
        if self.init_cfg is not None and self.init_cfg['type'] == 'Pretrained':
            super().init_weights()
            return
        for m in self.modules():
            module_name = m.__class__.__name__
            if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)):
                normal_init(m, 0, 0.02)
                init_info = (f'{module_name} belongs to (nn.Conv2d and '
                             'nn.ConvTranspose2d), initialize by normal '
                             'distribution with 0 mean and 0.02 std.')
            elif isinstance(m, _BatchNorm):
                nn.init.normal_(m.weight.data)
                nn.init.constant_(m.bias.data, 0)
                init_info = (f'{module_name} is BatchNorm, initialize weight '
                             'by normal discribution with unit mean and zero '
                             'std, and initialize bias as 0.')
            # save init info
            update_init_info(m, init_info)